def main(): CheckRucioSetup() CheckRemainingProxyTime() """""" RunOptions = getArgumentParser().parse_args() all_files = [] if RunOptions.single_out_file and len(RunOptions.out_file_name) == 0: logging.error("Please provide a file name if you run with --single-out_file") exit(1) # Do we have one dataset, or a file with a list of them? if os.path.exists(RunOptions.dataset): with open(RunOptions.dataset) as dsfile: for line in dsfile: # Ignore comment lines and empty lines if line.startswith('#'): continue realline = line.strip() if realline.find("_tid") > -1: realline = realline[0:realline.find("_tid")] if not realline: continue # Ignore whitespace if not RunOptions.single_out_file: createFileList(realline, RunOptions) else: all_files += GetDataSetFiles(realline, RunOptions.RSE, RunOptions.protocols) else: createFileList(RunOptions.dataset, RunOptions) if len(all_files) > 0: WriteList(all_files, options.out_file)
def __init__( self, #### Container with all dataset names rucio_container, ### RSE where the container is stored dest_rse, #### Download the container to the disk download=False, #### Merge the datasets to a common file merge=False, ## Download directory download_dir="/tmp/download", #### Destination_dir destination_dir="/tmp", #### Cluster_engine for potential merge cluster_engine=None, #### max_size_per_merged_file (B) max_merged_size=25 * 1024 * 1024 * 1024, ### Logical dataset_name (optional) logical_name="", #### Rucio groupdisk protocol protocol="root", ## hold jobs hold_jobs=[], ### Files per merge job files_per_merge_job=20, ): self.__container_name = rucio_container self.__rse = dest_rse self.__download = download self.__merge = merge self.__download_dir = download_dir self.__files_per_merge = files_per_merge_job while self.__download_dir.find("//") != -1: self.__download_dir = self.__download_dir.replace("//", "/") self.__dest_dir = destination_dir self.__engine = cluster_engine self.__max_file_size = max_merged_size self.__logical_name = logical_name CheckRemainingProxyTime() self.__files_on_rse = [f for f in GetDataSetFiles(self.container(), self.rse(), protocol) if self._is_good_file(f)] if len(rucio_container) > 0 and not self.__download else [] #### List of files to be downloaded on disk self.__files_on_disk = [] if self.__download: CreateDirectory(self.ds_download_dir(), False) downloadDataSets(InputDatasets=[self.container()], Destination=self.__download_dir, use_singularity=True) self.__files_on_disk = [ "%s/%s" % (self.ds_download_dir(), f) for f in os.listdir(self.ds_download_dir()) if self._is_good_file(self.ds_download_dir() + "/" + f) ] self.__merge_interfaces = [] self.__hold_jobs = hold_jobs
def main(): """Request datasets to RSE location.""" CheckRucioSetup() CheckRemainingProxyTime() RunOptions = getArgumentParser().parse_args() List = ClearFromDuplicates(ReadListFromFile(RunOptions.list)) ### Start replication of the datasets initiateReplication(ListOfDataSets=List, Rucio=RunOptions.rucio, RSE=RunOptions.RSE, lifeTime=RunOptions.lifetime, approve=RunOptions.askapproval, comment=RunOptions.comment)
def getAmiClient(): global m_AMIClient if m_AMIClient: return m_AMIClient try: import pyAMI.client import pyAMI.atlas.api as AtlasAPI except ImportError: print 'No AMI setup is found please SETUP AMI using "localSetupPyAMI"' sys.exit(1) if os.getenv("RUCIO_ACCOUNT") is None: print "No RUCIO ACCOUNT is available.. please define a rucio Account" exit(1) while CheckRemainingProxyTime() < 600: print "VOMS-PROXY is running out, renewing..." m_AMIClient = pyAMI.client.Client('atlas') AtlasAPI.init() return m_AMIClient
def getAmiClient(): global m_AMIClient if m_AMIClient: return m_AMIClient try: import pyAMI.client import pyAMI.atlas.api as AtlasAPI except ImportError: logging.error( 'No AMI setup is found please SETUP AMI using "localSetupPyAMI"') sys.exit(1) if not RUCIO_ACCOUNT: logging.error( "No RUCIO ACCOUNT is available.. please define a rucio Account") exit(1) while CheckRemainingProxyTime() < 600: logging.info("VOMS-PROXY is running out, renewing...") m_AMIClient = pyAMI.client.Client('atlas') AtlasAPI.init() return m_AMIClient
def main(): """List datasets located at a RSE location.""" CheckRucioSetup() CheckRemainingProxyTime() RunOptions = getArgumentParser().parse_args() Today = time.strftime("%Y-%m-%d") Patterns = RunOptions.pattern OutDir = RunOptions.OutDir RSE = RunOptions.RSE if ',' in RSE: RSE = RSE.split( ',' )[0] # in case people have more than one RSE in their environment variable for grid submits Prefix = '' if RunOptions.MyRequests: Prefix = 'MyRequestTo_' DS = ListUserRequests(RSE, RunOptions.rucio) else: DS = ListDisk(RSE) ### MetaFile = open("Content_%s.txt"%(RSE), 'w') ### for DataSet, Size in ListDiskWithSize(RSE): ### Owner, ID = GetDataSetInfo(DataSet,RSE) ### line = "%s | %s | %s | %.2f GB"%(ID, Owner,DataSet, Size) ### MetaFile.write("%s\n"%(line)) ### print line ### MetaFile.close() ### exit(0) if len(DS) == 0: logging.warning("Disk is empty.") exit(0) CreateDirectory(OutDir, False) ########### # Define the file list name ########### FileList = "%s%s_%s" % (Prefix, RSE, Today) if len(Patterns) > 0: FileList += "_%s" % ('_'.join(Patterns)) if len(RunOptions.exclude) > 0: FileList += "_exl_%s" % ('_'.join(RunOptions.exclude)) FileList += '.txt' Write = [] for d in sorted(DS): allPatternsFound = True for Pattern in Patterns: if not Pattern in d: allPatternsFound = False break for Pattern in RunOptions.exclude: if Pattern in d: allPatternsFound = False break if allPatternsFound: IsInWrite = False if d.split(".")[-1].isdigit(): d = d[:d.rfind(".")] if d.find("_tid") != -1: d = d[0:d.rfind("_tid")] if len([w for w in Write if w.find(d) != -1]) > 0: continue logging.info("Write dataset %s" % (d)) Write.append(d) if len(Write) == 0: logging.error("No datasets containing given pattern(s) found!") exit(0) WriteList(Write, "%s/%s" % (OutDir, FileList)) logging.info("Datasets written to file %s/%s" % (OutDir, FileList))
if line.startswith("|"): line = line[1:] else: continue Candidate = line.split("|")[0].strip() if Candidate in RSE: Replicas.append(Candidate) return Replicas def getRSEs(): Cmd = "rucio list-rses" return commands.getoutput(Cmd).split() if __name__ == '__main__': CheckRucioSetup() CheckRemainingProxyTime() OutDir = os.getcwd() parser = argparse.ArgumentParser( description= 'This script lists datasets located at a RSE location. Futher patterns to find or exclude can be specified.', prog='ListDisk', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-p', '-P', '--pattern', help='specify a pattern which is part of dataset name', nargs='+', default=[]) parser.add_argument(
def main(): """Merge files from a list using the MergeClass in ClusterEngine.""" RunOptions = getArgumentParser().parse_args() if RunOptions.fileListsFolder != "": if len(RunOptions.fileLists) > 0: logging.warning( 'You gave both a folder containing filelists and separate filelists, will merge both!' ) if not os.path.isdir(RunOptions.fileListsFolder): logging.error(' %s is not a directory, exiting...' % RunOptions.fileListsFolder) sys.exit(1) for l in os.listdir(RunOptions.fileListsFolder): if not os.path.isdir('%s/%s' % (RunOptions.fileListsFolder, l)): RunOptions.fileLists.append('%s/%s' % (RunOptions.fileListsFolder, l)) submit_engine = setup_engine(RunOptions) merging = [ submit_engine.create_merge_interface( out_name=L[L.rfind("/") + 1:L.rfind(".")], files_to_merge=ReadListFromFile(L), files_per_job=RunOptions.nFilesPerJob, hold_jobs=RunOptions.HoldJob, final_split=RunOptions.remainingSplit) for L in RunOptions.fileLists ] ### Rucio lists if len(RunOptions.RucioDSList) > 0: CheckRucioSetup() CheckRemainingProxyTime() #### Check that we can actually obtain the datasets if len(RunOptions.RucioRSE) == 0 and not RunOptions.download: logging.error( "Please specifiy either the RSE on which the datasets are stored via --RucioRSE or activate the download option" ) exit(1) ds_to_merge = ReadListFromFile(RunOptions.RucioDSList) download_dir = submit_engine.tmp_dir() + "TMP_DOWNLOAD/" if RunOptions.download: downloadDataSets(InputDatasets=ds_to_merge, Destination=download_dir, RSE=RunOptions.RucioRSE, use_singularity=False) to_wait = [] hold_jobs = [] for ds in ds_to_merge: ds_name = ds[ds.find(":") + 1:] if RunOptions.batch_size <= 0: merging += [ submit_engine.create_merge_interface( out_name=ds_name, files_to_merge=GetDataSetFiles(dsname=ds, RSE=RunOptions.RucioRSE, protocols="root") if not RunOptions.download else [ download_dir + ds_name + "/" + x for x in os.listdir(download_dir + ds_name) ], files_per_job=RunOptions.nFilesPerJob, hold_jobs=RunOptions.HoldJob + hold_jobs, final_split=RunOptions.remainingSplit) ] else: merging += [ DataSetFileHandler(rucio_container=ds, dest_rse=RunOptions.RucioRSE, download=RunOptions.download, merge=True, download_dir=download_dir, destination_dir=submit_engine.out_dir(), cluster_engine=submit_engine, max_merged_size=RunOptions.batch_size * 1024 * 1024 * 1024, hold_jobs=RunOptions.HoldJob + hold_jobs, files_per_merge_job=2) ] to_wait += [submit_engine.subjob_name(merging[-1].job_name())] if len(to_wait) % 5 == 0: hold_jobs = [w for w in to_wait] to_wait = [] for merge in merging: merge.submit_job() clean_hold = [ submit_engine.subjob_name(merge.job_name()) for merge in merging ] submit_engine.submit_clean_all(clean_hold) submit_engine.finish()