def __get_job_options(self, runNumbers): if not self.engine().submit_hook(): logging.warning( "A job with the name {j} has already been submitted.".format( j=self.engine().job_name())) return CreateDirectory(self.engine().config_dir(), True) for r in runNumbers: jobFolder = os.path.join(self.__joboptions_dir, "{ddd}xxx".format(ddd=str(r)[:3])) if not os.path.isdir(jobFolder): logging.warning( "Job option folder {f} for DSID {r} does not exist. Skipping {r}..." .format(f=jobFolder, r=r)) continue dir_to_copy = os.path.join(jobFolder, str(r)) if len(dir_to_copy) == 0: continue shutil.copytree(dir_to_copy, os.path.join(self.engine().config_dir(), str(r))) # assemble the config file for the job option seeds = [] while len(seeds) < self.__nJobs: s = random.uniform(100000, 500000) if s not in seeds: seeds += [s] jo = [os.path.join(self.engine().config_dir(), str(r))][0] out_dir = os.path.join(self.evgen_dir(), str(r)) WriteList( (ReadListFromFile(self.seed_file()) if os.path.exists( self.seed_file()) else []) + ["%d" % (i) for i in seeds], self.seed_file(), ) WriteList( (ReadListFromFile(self.run_file()) if os.path.exists(self.run_file()) else []) + ["%d" % (r) for i in range(self.__nJobs)], self.run_file(), ) WriteList( (ReadListFromFile(self.job_file()) if os.path.exists(self.job_file()) else []) + [jo for i in range(self.__nJobs)], self.job_file(), ) WriteList( (ReadListFromFile(self.out_file()) if os.path.exists(self.out_file()) else []) + [out_dir for i in range(self.__nJobs)], self.out_file(), ) # submit the job array self.__n_scheduled += self.__nJobs logging.info("INFO <__get_job_options> Found %s" % (jo))
def submit_job(self): if self.__submitted: return False job_array = WriteList( self.merge_lists(), "%s/%s.txt" % (self.engine().config_dir(), id_generator(31))) final_merge_name = WriteList( self.temporary_files(), "%s/%s.txt" % (self.engine().config_dir(), id_generator(30))) if not self.engine().submit_array( script="ClusterSubmission/Merge.sh", sub_job=self.job_name(), mem=self.engine().merge_mem(), env_vars=[ ("JobConfigList", job_array), ("OutFileList", final_merge_name), ("ALRB_rootVersion", ROOTVERSION), ], hold_jobs=self.hold_jobs(), run_time=self.engine().merge_time(), array_size=len(self.merge_lists())): return False self.__submitted = True if not self.child(): return True if not self.child().submit_job(): return False return self.engine().submit_clean_job( hold_jobs=[self.engine().subjob_name(self.child().job_name())], to_clean=self.temporary_files(), sub_job=self.job_name())
def __extract_seeds(self, run): try: EVNT_DIR = [ os.path.join(self.evgen_dir(), R) for R in os.listdir(self.evgen_dir()) if R.startswith(str(run)) ][0] except: return logging.info( "<__extract_seeds> Searching {evntdir} for EVNT files not already processed in derivation format {d}." .format(evntdir=EVNT_DIR, d=self.__derivation)) DERIVATION_DIR = os.path.join(self.aod_dir(), EVNT_DIR[EVNT_DIR.rfind("/") + 1:]) CreateDirectory(DERIVATION_DIR, False) Evnt_Seeds = [ int(E[E.find("EVNT") + 5:E.find(".pool")]) for E in os.listdir(EVNT_DIR) if E.endswith(".root") ] DAOD_Seeds = [ int(A.split(".")[-2]) for A in os.listdir(DERIVATION_DIR) if A.find(self.__derivation) != -1 and A.endswith(".root") ] Non_ProcSeeds = [seed for seed in Evnt_Seeds if seed not in DAOD_Seeds] if len(Non_ProcSeeds) == 0: return logging.info("Extracted seeds for run {r}:".format(r=run)) logging.info(" +-=- {s}".format( s=", ".join([str(seed) for seed in Non_ProcSeeds]))) WriteList( (ReadListFromFile(self.seed_file()) if os.path.exists(self.seed_file()) else []) + [str(seed) for seed in Non_ProcSeeds], self.seed_file(), ) WriteList( (ReadListFromFile(self.run_file()) if os.path.exists(self.run_file()) else []) + [str(run) for seed in Non_ProcSeeds], self.run_file(), ) WriteList( (ReadListFromFile(self.in_file()) if os.path.exists(self.in_file()) else []) + [EVNT_DIR for seed in Non_ProcSeeds], self.in_file(), ) self.__n_scheduled += len(Non_ProcSeeds)
def main(): CheckRucioSetup() CheckRemainingProxyTime() """""" RunOptions = getArgumentParser().parse_args() all_files = [] if RunOptions.single_out_file and len(RunOptions.out_file_name) == 0: logging.error("Please provide a file name if you run with --single-out_file") exit(1) # Do we have one dataset, or a file with a list of them? if os.path.exists(RunOptions.dataset): with open(RunOptions.dataset) as dsfile: for line in dsfile: # Ignore comment lines and empty lines if line.startswith('#'): continue realline = line.strip() if realline.find("_tid") > -1: realline = realline[0:realline.find("_tid")] if not realline: continue # Ignore whitespace if not RunOptions.single_out_file: createFileList(realline, RunOptions) else: all_files += GetDataSetFiles(realline, RunOptions.RSE, RunOptions.protocols) else: createFileList(RunOptions.dataset, RunOptions) if len(all_files) > 0: WriteList(all_files, options.out_file)
def downloadDataSets(InputDatasets, Destination, RSE="", use_singularity=False): ### Apparently rucio does no longer work in combination with AthAnalysis. So let's ### execute it from a singulartity container Cmds = [] image_to_choose = setupBatchSubmitArgParser().get_default("SingularityImage") home_dir = setupBatchSubmitArgParser().get_default("BaseFolder") + "/TMP/.singularity/" CreateDirectory(Destination, False) if use_singularity: CreateDirectory(home_dir, False) to_clean = [] for DS in InputDatasets: if not use_singularity: Cmds += ["rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination)] else: singularity_dir = home_dir + "/" + id_generator(21) to_clean += [singularity_dir] singularity_script = WriteList([ "#!/bin/bash", "export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase", "source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh", "lsetup rucio", "echo 'rucio download %s --ndownloader 32 %s --dir %s'" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination), "rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination), ], "%s/to_exec.sh" % (singularity_dir)) os.system("chmod 0777 " + singularity_script) Cmds += [ "singularity exec --cleanenv -H %s:/alrb -B %s:/srv %s/%s %s" % (singularity_dir, Destination, SINGULARITY_DIR, image_to_choose, singularity_script) ] ExecuteCommands(ListOfCmds=Cmds, MaxCurrent=8) for c in to_clean: os.system("rm -rf %s" % (c))
def write_ship_file(self, env_vars): ship_file_name = WriteList(["#!/bin/bash"] + [ "export %s='%s'" % (var, val) for var, val in env_vars + self.common_env_vars() ], "%s/%s.sh" % (self.config_dir(), id_generator(74))) os.system("chmod 0700 %s" % (ship_file_name)) return ship_file_name
def submit_move_job(self, hold_jobs=[], to_move=[], destination="", source_dir="", sub_job=""): move_cfg = "" if len(to_move) > 0: move_cfg = "%s/Move_%s.txt" % (self.config_dir(), id_generator(35)) WriteList(to_move, move_cfg) elif len(source_dir) > 0: move_cfg = source_dir else: logging.error("<submit_move_job> Nothing to move") return False if len(destination) == 0: logging.error( "<submit_move_job> No destination where to move provided") return False return self.submit_job(script="ClusterSubmission/Move.sh", mem=100, env_vars=[ ("DestinationDir", destination), ("FromDir", move_cfg), ], hold_jobs=hold_jobs, sub_job="Move%s%s" % ("" if len(sub_job) == 0 else "-", sub_job), run_time="01:00:00")
def updateBlackList(black_list): current_black = getPRWblackList() if IsListIn(black_list, current_black): return current_black = ClearFromDuplicates(current_black + black_list) current_dir = os.getcwd() FileName = os.path.realpath( ResolvePath("XAMPPbase/BlackListedPRWdatasets.txt")) Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase")) ############################################################################### # Find out the current branch to propagage only # # the updated List to the main repository. Other changes regarding # # side developments of the package should not be propagated yet # ############################################################################### upstream = setupGITupstream() current_branch = getBranch() os.chdir(Pkg_Dir) new_branch = "PRW_%s_%s" % (time.strftime("%Y%m%d"), USERNAME) if current_branch: os.system( "git commit -am \"Commit changes of all files in order to push the 'BlackListedPRWdatasets.txt'\"" ) print "INFO: Create new branch %s to update the BlackListedPRWdatasets " % ( new_branch) os.system("git checkout -b %s %s/master" % (new_branch, upstream)) WriteList(sorted(current_black), FileName) os.system("git add BlackListedPRWdatasets.txt") os.system( "git commit BlackListedPRWdatasets.txt -m \"Updated the list of black prw files\"" ) os.system("git push %s %s" % (upstream, new_branch)) if current_branch: os.system("git checkout %s" % (current_branch)) os.chdir(current_dir)
def insertPRWUser(user): Users = getUsersSubmittedPRW() if user in Users: return Users += [user] current_dir = os.getcwd() FileName = os.path.realpath( ResolvePath("XAMPPbase/UsersWhoSubmittedPRW.txt")) Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase")) ############################################################################### # Find out the current branch to propagage only # # the updated List to the main repository. Other changes regarding # # side developments of the package should not be propagated yet # ############################################################################### upstream = setupGITupstream() current_branch = getBranch() os.chdir(Pkg_Dir) new_branch = "PRW_" + user.replace(".", "_") if current_branch: os.system( "git commit -am \"Commit changes of all files in order to push the 'UsersWhoSubmittedPRW.txt'\"" ) print "INFO: Create new branch %s to update the UsersWhoSubmittedPRW " % ( new_branch) os.system("git checkout -b %s %s/master" % (new_branch, upstream)) print "INFO: %s submitted to the grid prw_config jobs. Add him to the common list such that others can download his files" % ( user) WriteList(sorted(Users), FileName) os.system("git add UsersWhoSubmittedPRW.txt") os.system( "git commit UsersWhoSubmittedPRW.txt -m \"Added %s to the list of users who submitted a prw config creation job\"" % (user)) os.system("git push %s %s" % (upstream, new_branch)) if current_branch: os.system("git checkout %s" % (current_branch)) os.chdir(current_dir)
def run(self): CreateDirectory(self.download_dir(), False) CreateDirectory(self.final_directory(), False) #self.__datasets = sorted(self.__datasets, key=lambda x: GetPRW_datasetID(x)) DownloadList = [ 'rucio download --ndownloader 5 --dir %s %s' % (self.download_dir(), ds) for ds in self.__datasets if ds not in self.__already_on_disk ] ExecuteCommands(ListOfCmds=DownloadList, MaxCurrent=16) self.clearFromDuplicates(self.download_dir()) Files = [] for dir in os.listdir(self.download_dir()): dir_path = "%s/%s" % (self.download_dir(), dir) if not os.path.isdir(dir_path): continue if not self.hasDataset(dir): continue Files += [ "%s/%s" % (dir_path, F) for F in os.listdir(dir_path) if IsROOTFile(F) ] WriteList(sorted(Files), "%s/temp_in.txt" % (self.download_dir())) # only 1 entry in the MCPileupReweighting tree per Channel/RunNumber combination is actually needed # thus, remove all others but one in order to significantly reduce the files size! # This is done by the SlimPRWFile macro in XAMPPbase/utils/ MergeCmd = "SlimPRWFile --InList %s/temp_in.txt --outFile %s" % ( self.download_dir(), self.final_file()) print MergeCmd os.system(MergeCmd) print "INFO: Clean up the temporary file" os.system("rm %s/temp_in.txt " % (self.download_dir())) self.standaloneCheck() print "INFO: Done"
def write_status_log(file_location, grid_jobs): if len(grid_jobs) > 0: max_task_letters = max([len(str(job.jediTaskID)) for job in grid_jobs]) max_status_letters = max( [len(str(job.taskStatus)) for job in grid_jobs]) max_job_name_letters = max([len(job.jobName) for job in grid_jobs]) max_progress_letters = max( [len(make_progess(job)) for job in grid_jobs]) log_file = [] for job in sorted(grid_jobs, key=lambda x: get_progress(x)[0], reverse=True): proc_frac, done_jobs, all_jobs = get_progress(job) log_file += [ "https://bigpanda.cern.ch/task/%s/ %s %s %s %s %s %s %s(%.2f%%) %s" % (job.jediTaskID, FillWhiteSpaces(max_task_letters - len(str(job.jediTaskID))), job.jobName, FillWhiteSpaces(max_job_name_letters - len(job.jobName)), job.taskStatus, FillWhiteSpaces(max_status_letters - len(job.taskStatus)), make_progess(job), FillWhiteSpaces(max_progress_letters - len(make_progess(job))), proc_frac, job.inDS) ] WriteList(log_file, file_location)
def submit_copy_job( self, hold_jobs=[], to_copy=[], ### Give particular files to copy destination="", source_dir="", ### Optional sub_job=""): copy_cfg = "" if len(to_copy) > 0: copy_cfg = "%s/Copy_%s.txt" % (self.config_dir(), id_generator(35)) WriteList(to_copy, copy_cfg) elif len(source_dir) > 0: copy_cfg = source_dir else: print "<submit_copy_job> Nothing to copy" return False if len(destination) == 0: print "<Submit_copy_job> Where to copy everything?" return False return self.submit_job(script="ClusterSubmission/Copy.sh", mem=100, env_vars=[ ("DestinationDir", destination), ("FromDir", copy_cfg), ], hold_jobs=hold_jobs, sub_job="Copy%s%s" % ("" if len(sub_job) == 0 else "-", sub_job), run_time="01:00:00")
def submit_clean_job(self, hold_jobs=[], to_clean=[], sub_job=""): clean_cfg = "%s/Clean_%s.txt" % (self.config_dir(), id_generator(35)) WriteList(to_clean, clean_cfg) return self.submit_job(script="ClusterSubmission/Clean.sh", mem=100, env_vars=[("ToClean", clean_cfg)], hold_jobs=hold_jobs, sub_job="Clean%s%s" % ("" if len(sub_job) == 0 else "-", sub_job), run_time="01:00:00")
def __assemble_merge_list(self, files_to_merge): copied_in = [x for x in files_to_merge] if self.__shuffle_files: shuffle(copied_in) merge_lists = [] merge_in = [] for i, fi in enumerate(copied_in): if i > 0 and i % self.__files_per_job == 0: merge_name = "%s/%s.txt" % (self.engine().config_dir(), id_generator(85)) WriteList(merge_in, merge_name) merge_lists += [merge_name] merge_in = [] merge_in += [fi] ### Pack the last remenants into a last merge job if len(merge_in) > 0: merge_name = "%s/%s.txt" % (self.engine().config_dir(), id_generator(85)) WriteList(merge_in, merge_name) merge_lists += [merge_name] return merge_lists
def createFileList(dsname, options): logging.info('Creating file list for ' + dsname) DS = GetDataSetFiles(dsname, options.RSE, options.protocols) if len(DS) == 0: logging.error("No datasets found") return if dsname.find(":") > -1: dsname = dsname[dsname.find(":") + 1:len(dsname)] CreateDirectory(options.OutDir, False) filelistname = options.OutDir + "/" + dsname.rstrip('/') + ".txt" if os.path.exists(filelistname) == True: logging.info("Remove the old FileList") os.system("rm " + filelistname) WriteList(DS, filelistname)
def createFileList(dsname, options): prettyPrint('Creating file list for', dsname) DS = GetDataSetFiles(dsname, options.RSE, options.protocols) if len(DS) == 0: print "No datasets found" return if dsname.find(":") > -1: dsname = dsname[dsname.find(":") + 1:len(dsname)] OutDir = options.OutDir if os.path.exists(OutDir) == False: print "mkdir -p " + OutDir os.system("mkdir -p " + OutDir) filelistname = OutDir + "/" + dsname.rstrip('/') + ".txt" if os.path.exists(filelistname) == True: print "Remove the old FileList" os.system("rm " + filelistname) WriteList(DS, filelistname)
def pack_environment(self, env_vars, script): exec_script = self.link_to_copy_area(script) if not exec_script: return False ship_file = self.write_ship_file(env_vars) if self.run_singularity(): ship_file = self.write_ship_file([ ("CONTAINER_SCRIPT", exec_script), ("CONTAINER_IMAGE", self.singularity_container()), ("CONTAINER_SHIPING_FILE", ship_file), ]) exec_script = self.link_to_copy_area( ResolvePath("ClusterSubmission/Singularity.sh")) env_script = WriteList([ "#!/bin/bash", "source %s" % (ship_file), "source %s" % (exec_script) ], "%s/EnvScript_%s.sh" % (self.config_dir(), id_generator(50))) os.system("chmod 0700 %s" % (env_script)) return env_script
def submit_job(self): WriteList( [ D.replace(self.evgen_dir(), self.aod_dir()) for D in ReadListFromFile(self.in_file()) ], self.out_file(), ) extra_args = "" if len(self.__preExec) > 0: extra_args += ' --preExec "%s" ' % (self.__preExec) if len(self.__preInclude) > 0: extra_args += ' --preInclude "%s" ' % (self.__preInclude) if len(self.__postExec) > 0: extra_args += ' --postExec "%s" ' % (self.__postExec) if len(self.__postInclude) > 0: extra_args += ' --postInclude "%s" ' % (self.__postInclude) if not self.engine().submit_array( sub_job=self.__derivation, script="SubmitMC/batch_derivation.sh", mem=self.__mem, env_vars=[ ("SeedFile", self.seed_file()), ("RunFile", self.run_file()), ("InFile", self.in_file()), ("OutFile", self.out_file()), ("DERIVATION_DIR", self.aod_dir()), ("DerivationRelease", self.__derivRelease), ("DerivationCache", self.__derivCache), ("ReductionConf", self.__derivation), ("ExtraArgs", extra_args), ], hold_jobs=self.hold_jobs(), run_time=self.__run_time, array_size=self.n_scheduled(), ): return False return True
def submit_build_job(self): if self.__submitted_build: print "ERROR <submit_build_job>: Build job is already submitted" return False if not self.submit_hook(): return False if self.send_build_job() and not self.submit_job( script="ClusterSubmission/Build.sh", sub_job="Build", mem=self.__buildMem, env_vars=[("CleanOut", self.out_dir()), ("CleanLogs", self.log_dir()), ("CleanTmp", self.tmp_dir()), ("nCoresToUse", self.__buildCores), ("COPYAREA", self.build_dir())], run_time=self.__buildTime, hold_jobs=self.__holdBuild): return False elif not self.send_build_job(): if not CreateDirectory(self.log_dir(), False) or not CreateDirectory( self.out_dir(), False): return False Dummy_Job = "%s/%s.sh" % (self.config_dir(), id_generator(35)) WriteList([ "#!/bin/bash", "echo \"I'm a dummy build job. Will wait 60 seconds until everything is scheduled\"", "sleep 120" ], Dummy_Job) if not self.submit_job(script=Dummy_Job, sub_job="Build", mem=100, env_vars=[], run_time="00:05:00", hold_jobs=self.__holdBuild): return False self.__submitted_build = True self.lock_area() return True
def finish(self): if len(self.__job_dependency_dict) == 0: logging.debug("Nothing has been scheduled") return False dag_content = [] for job in self.__job_dependency_dict: dag_content += job.get_job_config_str() dag_content += ["\n\n\n"] for job in self.__job_dependency_dict: dag_content += job.get_dependency_str() dag_dir = self.log_dir() + "/DAG/" dag_location = WriteList(dag_content, "%s/%s.dag" % (dag_dir, self.job_name())) os.chdir(dag_dir) cmd = "condor_submit_dag -verbose -maxidle %d %s %s.dag" % ( self.max_running_per_array(), ("-append '+MyProject = \"%s\"'" % self.accountinggroup() if self.accountinggroup() else ""), self.job_name()) return not os.system(cmd)
def submit_build_job(self): if self.check_submitted_build(): logging.warning( "<submit_build_job>: Build job is already submitted") return True if not self.submit_hook(): return False ### Few cluster engines go crazy if the log files of the own jobs are deleted ### Make sure that the build job deletes the log dir before submission if not CreateDirectory(self.log_dir(), True): return False if self.send_build_job() and not self.submit_job( script="ClusterSubmission/Build.sh", sub_job="Build", mem=self.get_build_mem(), n_cores=self.get_build_cores(), env_vars=[("CleanOut", self.out_dir()), ("CleanTmp", self.tmp_dir()), ("nCoresToUse", self.get_build_cores()), ("COPYAREA", self.build_dir())], run_time=self.get_build_time(), hold_jobs=self.get_build_hold_jobs()): return False elif not self.send_build_job(): if not CreateDirectory(self.out_dir(), False): return False Dummy_Job = WriteList([ "#!/bin/bash", "echo \"I'm a dummy build job. Will wait 15 seconds until everything is scheduled\"", "sleep 15" ], "%s/%s.sh" % (self.config_dir(), id_generator(35))) if not self.submit_job(script=Dummy_Job, sub_job="Build", mem=100, run_time="00:05:00", hold_jobs=self.__holdBuild): return False self.__submitted_build = True self.lock_area() return True
def download_ci_files(options): ### Retrieve first the EOS token getEOS_token(options) ### Check first whether the CI dir actually exits smp_dir = "%s/datasamples/" % (options.ciDir) if not os.path.isdir(smp_dir): print "ERROR: The path to look up for the data samples %s does not exists. Where is my data" % ( smp_dir) exit(1) ### Create first the directory to store the temporary files in there ### Clean the old remants CreateDirectory(options.TEMPdir, True) downloaded_smp = [] for smp in os.listdir(smp_dir): smp_name = smp[:smp.rfind(".")] print "INFO: Download the files from sample %s" % (smp_name) download_to = "%s/%s" % (options.TEMPdir, smp_name) CreateDirectory(download_to, False) ### Download the files first for file_to_load in ReadListFromFile("%s/%s" % (smp_dir, smp)): destination_file = "%s/%s" % ( download_to, file_to_load[file_to_load.rfind("/") + 1:]) CopyCmd = "xrdcp %s/%s %s" % (options.EOSpath, file_to_load, destination_file) if os.path.exists(destination_file): print "INFO: Omit do download %s" % (file_to_load) elif os.system(CopyCmd) != 0: print "ERROR: Failed to download %s" % (file_to_load) exit(1) ### Write the file list for the analysis file_list = "%s/FileList_%s.txt" % (options.TEMPdir, smp_name) WriteList([ "%s/%s" % (download_to, f[f.rfind("/") + 1:]) for f in ReadListFromFile("%s/%s" % (smp_dir, smp)) ], file_list) downloaded_smp += [smp_name] return downloaded_smp
def write_broken_log(options, broken_jobs): if len(broken_jobs) > 0: logging.info("Found %d unhealthy jobs will prompt them below" % (len(broken_jobs))) max_task_letters = max( [len(str(job.jediTaskID)) for job in broken_jobs]) max_status_letters = max( [len(str(job.taskStatus)) for job in broken_jobs]) max_job_name_letters = max([len(job.jobName) for job in broken_jobs]) log_file = [] for job in sorted(broken_jobs, key=lambda x: x.jobName): log_file += [ "https://bigpanda.cern.ch/task/%s/ %s %s %s %s %s %s" % (job.jediTaskID, FillWhiteSpaces(max_task_letters - len(str(job.jediTaskID))), job.taskStatus, FillWhiteSpaces(max_status_letters - len(job.taskStatus)), job.jobName, FillWhiteSpaces(max_job_name_letters - len(job.jobName)), job.inDS) ] logging.info(log_file[-1]) WriteList(log_file, options.broken_log_file)
chan_in_old = readPRWchannels("%s/%s" % (RunOptions.oldPRWDir, new)) chan_in_new = readPRWchannels("%s/%s" % (RunOptions.newPRWDir, new)) messages = [] AnythingNew = False ### Compare the prw channels of both files for c in chan_in_new: if not c in chan_in_old: messages += ["INFO: Channel %d (%s) has been added through the last iteration to %s" % (c, MyxSecDB.name(c), new)] AnythingNew = True else: chan_in_old.remove(c) ### The old file somehow contains additional channels. We need to double check if len(chan_in_old) > 0: messages += [ "WARNING: The following channels were merged into the old file %s/%s but are no longer present in %s/%s" % (RunOptions.oldPRWDir, new, RunOptions.newPRWDir, new) ] for c in chan_in_old: messages += [" -=-=- %d (%s)" % (c, MyxSecDB.name(c))] if not AnythingNew: messages += ["INFO: Nothing new has been added to %s/%s w.r.t %s/%s" % (RunOptions.oldPRWDir, new, RunOptions.newPRWDir, new)] WriteList(messages, "PRWcheck_%s.log" % (new[:new.rfind(".")])) if len(chan_in_old) > 0 and AnythingNew and RunOptions.uniteFiles: Cmd = "SlimPRWFile --inFile %s/%s --inFile %s/%s --outFile %s/%s --InIsSlimmed" % ( RunOptions.newPRWDir, new, RunOptions.oldPRWDir, new, RunOptions.newPRWDir, new) os.system(Cmd)
def _write_submission_file(self, sub_job, exec_script, env_vars=[], mem=1, run_time='00:00:01', nproc=1, arraylength=-1): self.set_cluster_control_module( "ClusterSubmission/ClusterControlHTCONDOR.sh") if not exec_script: logging.error("<_write_submission_file> No exec_script was given!") return False if mem < 0: logging.error( "<_write_submission_file> No memory requirement for the job was specified." ) return False job_name = self.subjob_name(sub_job) if len([ x for x in self.__job_dependency_dict if x.getJobName() == job_name ]): logging.error( "The job %s has already been defined. Please ensure unique job names" % (job_name)) return False log_string = "%s/%s%s" % ( self.log_dir(), sub_job if len(sub_job) else job_name, "_$(CONDOR_TASK_ID)" if arraylength > 0 else "") exec_script = self.pack_environment(env_vars, exec_script) submision_content = [] submision_content += [ "universe = vanilla", "executable = %s" % (exec_script), "output = %s.out" % (log_string), "error = %s.err" % (log_string), "log = %s.log" % (log_string), #"transfer_executable = True", "notification = Error", "notify_user = %s" % (self.mail_user()), "request_memory = %d" % (mem), "on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)", "request_cpus = %d" % (nproc), #### Extra attributes "+MaxRuntime = %d" % (TimeToSeconds(run_time)), ### CERN cloud "+RequestRuntime = %d" % (TimeToSeconds(run_time)), ### DESY cloud "+MyProject = %s" % (self.accountinggroup()) if self.accountinggroup() else "", ] if arraylength > 0: submision_content += [ "environment = CONDOR_TASK_ID=$(CONDOR_TASK_ID)" ] submision_content += [ "queue", ] self.__job_dependency_dict += [ HTCondorJob(job_name=job_name, submission_file=WriteList( submision_content, "%s/%s.sub" % (self.config_dir(), id_generator(25))), arraylength=arraylength, engine=self, abbreviation=self.__assign_abb_letter()) ] self.__submitted_jobs += 1 if arraylength <= 1 else arraylength return True
def lock_area(self): WriteList( ["###Hook file to prevent double submission of the same job"], "%s/.job.lck" % (self.tmp_dir()))
while AOD.rfind("_r") != AOD.find("_r"): AOD = AOD[:AOD.rfind("_r")] ### Remote the double e -tag while AOD.rfind(".e") < AOD.rfind("_e"): uscore_pos = AOD.find("_", AOD.rfind(".e")) AOD = AOD[:uscore_pos] + AOD[AOD.find("_", uscore_pos + 1):] return AOD if __name__ == '__main__': OutDir = os.getcwd() parser = argparse.ArgumentParser( description='This script converts DAOD filelists to AOD filelists which then can be used for creating pileup reweighting files.', prog='CreateAODFromDAODList', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--datasets', '-d', '-D', help='DAOD filelist to be converted into AOD', required=True) parser.add_argument('--outFile', help="pipe the output into a script into a file", default='') RunOptions = parser.parse_args() logging.info('The following DAODs are converted into ADOs:\n') DAODsToConvert = [convertToAOD(daod) for daod in ReadListFromFile(RunOptions.datasets)] logging.info('\nThe ADOs are:\n') for daod in DAODsToConvert: logging.info(" --- %s" % (daod)) if len(RunOptions.outFile) > 0: WriteList(DAODsToConvert, RunOptions.outFile)
def main(): """List datasets located at a RSE location.""" CheckRucioSetup() CheckRemainingProxyTime() RunOptions = getArgumentParser().parse_args() Today = time.strftime("%Y-%m-%d") Patterns = RunOptions.pattern OutDir = RunOptions.OutDir RSE = RunOptions.RSE if ',' in RSE: RSE = RSE.split( ',' )[0] # in case people have more than one RSE in their environment variable for grid submits Prefix = '' if RunOptions.MyRequests: Prefix = 'MyRequestTo_' DS = ListUserRequests(RSE, RunOptions.rucio) else: DS = ListDisk(RSE) ### MetaFile = open("Content_%s.txt"%(RSE), 'w') ### for DataSet, Size in ListDiskWithSize(RSE): ### Owner, ID = GetDataSetInfo(DataSet,RSE) ### line = "%s | %s | %s | %.2f GB"%(ID, Owner,DataSet, Size) ### MetaFile.write("%s\n"%(line)) ### print line ### MetaFile.close() ### exit(0) if len(DS) == 0: logging.warning("Disk is empty.") exit(0) CreateDirectory(OutDir, False) ########### # Define the file list name ########### FileList = "%s%s_%s" % (Prefix, RSE, Today) if len(Patterns) > 0: FileList += "_%s" % ('_'.join(Patterns)) if len(RunOptions.exclude) > 0: FileList += "_exl_%s" % ('_'.join(RunOptions.exclude)) FileList += '.txt' Write = [] for d in sorted(DS): allPatternsFound = True for Pattern in Patterns: if not Pattern in d: allPatternsFound = False break for Pattern in RunOptions.exclude: if Pattern in d: allPatternsFound = False break if allPatternsFound: IsInWrite = False if d.split(".")[-1].isdigit(): d = d[:d.rfind(".")] if d.find("_tid") != -1: d = d[0:d.rfind("_tid")] if len([w for w in Write if w.find(d) != -1]) > 0: continue logging.info("Write dataset %s" % (d)) Write.append(d) if len(Write) == 0: logging.error("No datasets containing given pattern(s) found!") exit(0) WriteList(Write, "%s/%s" % (OutDir, FileList)) logging.info("Datasets written to file %s/%s" % (OutDir, FileList))
OutDir = os.getcwd() parser = argparse.ArgumentParser( description= 'This script converts DAOD filelists to AOD filelists which then can be used for creating pileup reweighting files.', prog='CreateAODFromDAODList', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--datasets', '-d', '-D', help='DAOD filelist to be converted into AOD', required=True) parser.add_argument('--outFile', help="pipe the output into a script into a file", default='') RunOptions = parser.parse_args() print 'The following DAODs are converted into ADOs:\n' DAODsToConvert = [ convertToAOD(daod) for daod in ReadListFromFile(RunOptions.datasets) ] print '\nThe ADOs are:\n' for daod in DAODsToConvert: print " --- %s" % (daod) if len(RunOptions.outFile) > 0: WriteList(DAODsToConvert, RunOptions.outFile)
def __prepare_input(self, in_ds=""): print "INFO <_prepare_input>: Assemble configuration for %s" % (in_ds) ### Name to be piped to the job out_name = in_ds[in_ds.rfind("/") + 1:in_ds.rfind(".")] if IsTextFile( in_ds) or IsROOTFile(in_ds) else in_ds split_dir = "%s/Datasets/%s" % (self.split_cfg_dir(), out_name) root_files = [] ### Now we need to find the corresponding ROOT files ### 1) The dataset is a root file itself if IsROOTFile(in_ds): root_files += [in_ds] ### 2) The given dataset is a .txt file elif IsTextFile(in_ds): ### Find the root files from there root_files = self.__extract_root_files(in_ds) if len(root_files) == 0: return False ### 3) The given dataset is a directory elif os.path.isdir(in_ds): if in_ds.endswith("/"): in_ds = in_ds[:in_ds.rfind("/")] out_name = in_ds[in_ds.rfind("/") + 1:] split_dir = "%s/Directory/%s" % (self.split_cfg_dir(), out_name) root_files = [ "%s/%s" % (in_ds, F) for F in os.listdir(in_ds) if IsROOTFile(F) ] ### 4) It's a logical dataset stored on d-cache else: root_files = self.__find_on_dcache(in_ds) if len(root_files) == 0: print "ERROR: Could not associate anything to %s" % (in_ds) return False if len(out_name) == 0: print "ERROR: How should the output be called %s" % (in_ds) return False ### Assemble the splitting of the jobs main_list = "%s/AllROOTFiles.main" % (split_dir) files_in_main = ReadListFromFile(main_list) if os.path.exists( main_list) else [] ### The list is unkown or the content of ROOT files has changed ### Redo the splitting again ;-) if len(files_in_main) != len(root_files) or not IsListIn( files_in_main, root_files): print "INFO: Assemble new split for %s" % (in_ds) CreateDirectory(split_dir, True) WriteList(root_files, main_list) os.system("CreateBatchJobSplit -I %s -O %s -EpJ %i" % (main_list, split_dir, self.__events_per_job)) ### Each of the lists contains the ROOT files to process per each sub job split_lists = [ "%s/%s" % (split_dir, F) for F in os.listdir(split_dir) if IsTextFile(F) ] n_jobs = len(split_lists) subjob_outs = [ "%s/%s_%d.root" % (self.engine().tmp_dir(), out_name, d) for d in range(n_jobs) ] assembled_in = [] if not os.path.exists( self.job_input()) else ReadListFromFile(self.job_input()) assembled_out = [] if not os.path.exists( self.job_out_names()) else ReadListFromFile(self.job_out_names()) start_reg = len(assembled_in) ### Write what we've WriteList(assembled_in + split_lists, self.job_input()) WriteList(assembled_out + subjob_outs, self.job_out_names()) #### Submit the merge jobs self.__merge_interfaces += [ self.engine().create_merge_interface( out_name=out_name, files_to_merge=subjob_outs, hold_jobs=[(self.engine().job_name(), [start_reg + i + 1 for i in range(n_jobs)])], files_per_job=self.__files_per_merge_itr, final_split=self.__final_split) ] self.__nsheduled += n_jobs return True