def finish(self): CreateDirectory(self.log_dir(), False) CreateDirectory(self.tmp_dir(), False) executable = [th for th in self.get_threads() if th.is_launchable()] running = [] dead_jobs = [] cycles = 0 ### There are still some jobs to execute while self.__runned_jobs + len(dead_jobs) < self.n_threads(): cycles += 1 running = [th for th in running if th.isAlive()] if len(running) < self.max_running_per_array(): for th in executable: if len(running) >= self.max_running_per_array(): break th.start() self.__runned_jobs += 1 running += [th] executable = [ th for th in self.get_threads() if th.is_launchable() ] else: dead_jobs += [ th for th in self.get_threads() if th.is_dead() or th.in_dead_chain() and not th in dead_jobs ] time.sleep(1) if cycles % 120 == 0: self.print_status(running) while getRunningThreads(running) > 0: time.sleep(0.5) cycles += 1 if cycles % 120 == 0: self.print_status(running)
def run(self): CreateDirectory(self.download_dir(), False) CreateDirectory(self.final_directory(), False) #self.__datasets = sorted(self.__datasets, key=lambda x: GetPRW_datasetID(x)) DownloadList = [ 'rucio download --ndownloader 5 --dir %s %s' % (self.download_dir(), ds) for ds in self.__datasets if ds not in self.__already_on_disk ] ExecuteCommands(ListOfCmds=DownloadList, MaxCurrent=16) self.clearFromDuplicates(self.download_dir()) Files = [] for dir in os.listdir(self.download_dir()): dir_path = "%s/%s" % (self.download_dir(), dir) if not os.path.isdir(dir_path): continue if not self.hasDataset(dir): continue Files += [ "%s/%s" % (dir_path, F) for F in os.listdir(dir_path) if IsROOTFile(F) ] WriteList(sorted(Files), "%s/temp_in.txt" % (self.download_dir())) # only 1 entry in the MCPileupReweighting tree per Channel/RunNumber combination is actually needed # thus, remove all others but one in order to significantly reduce the files size! # This is done by the SlimPRWFile macro in XAMPPbase/utils/ MergeCmd = "SlimPRWFile --InList %s/temp_in.txt --outFile %s" % ( self.download_dir(), self.final_file()) print MergeCmd os.system(MergeCmd) print "INFO: Clean up the temporary file" os.system("rm %s/temp_in.txt " % (self.download_dir())) self.standaloneCheck() print "INFO: Done"
def downloadDataSets(InputDatasets, Destination, RSE="", use_singularity=False): ### Apparently rucio does no longer work in combination with AthAnalysis. So let's ### execute it from a singulartity container Cmds = [] image_to_choose = setupBatchSubmitArgParser().get_default("SingularityImage") home_dir = setupBatchSubmitArgParser().get_default("BaseFolder") + "/TMP/.singularity/" CreateDirectory(Destination, False) if use_singularity: CreateDirectory(home_dir, False) to_clean = [] for DS in InputDatasets: if not use_singularity: Cmds += ["rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination)] else: singularity_dir = home_dir + "/" + id_generator(21) to_clean += [singularity_dir] singularity_script = WriteList([ "#!/bin/bash", "export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase", "source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh", "lsetup rucio", "echo 'rucio download %s --ndownloader 32 %s --dir %s'" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination), "rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination), ], "%s/to_exec.sh" % (singularity_dir)) os.system("chmod 0777 " + singularity_script) Cmds += [ "singularity exec --cleanenv -H %s:/alrb -B %s:/srv %s/%s %s" % (singularity_dir, Destination, SINGULARITY_DIR, image_to_choose, singularity_script) ] ExecuteCommands(ListOfCmds=Cmds, MaxCurrent=8) for c in to_clean: os.system("rm -rf %s" % (c))
def submit_job( self, script, sub_job="", mem=-1, env_vars=[], hold_jobs=[], run_time="", ): if not CreateDirectory(self.log_dir(), False): return False exec_script = self.link_to_copy_area(script) pwd = os.getcwd() os.chdir(self.log_dir()) if not exec_script: return False if mem < 0: print "ERROR: Please give a reasonable memory" return False submit_cmd = "sbatch --output=%s/%s.log --mail-type=FAIL --mail-user='******' --mem=%iM %s %s --job-name='%s' --export=%s %s" % ( self.log_dir(), sub_job if len(sub_job) > 0 else self.job_name(), self.mail_user(), mem, self.__partition(run_time), self.__shedule_jobs(self.to_hold(hold_jobs), sub_job), self.subjob_name(sub_job), ",".join([ "%s='%s'" % (var, value) for var, value in env_vars + self.common_env_vars() ]), exec_script) if os.system(submit_cmd): return False os.chdir(pwd) return True
def evaluate_cut_flows(options, Proccessed_Smp=[], analysis="XAMPPbase"): for Sample, Out_File in Proccessed_Smp: if not os.path.exists(Out_File): print "ERROR: No such file or directory %s. Skip sample" % ( Out_File) continue ### Execute the cutflow commands for each region cflow_dir = "%s/reference_cutflows/" % (options.ciDir) CreateDirectory(cflow_dir, False) for region in options.regions: CI_file = "%s/%s_%s_%s.txt" % (cflow_dir, Sample, analysis, region) Cflow_Cmd = "python %s -i %s -a %s | tee %s" % (ResolvePath( "XAMPPbase/python/printCutFlow.py"), Out_File, region, CI_file) if os.system(Cflow_Cmd) != 0: print "ERROR: Could not process cutflow %s in file %s" % ( region, Out_File) del_cmd = "rm %s" % (CI_file) os.system(del_cmd) CI_file_weighted = "%s_weighted.txt" % ( CI_file[:CI_file.rfind(".")]) #### Skip data files to be added to the weighted cutflow if Sample.lower().find("data") != -1: continue Cflow_Cmd = "python %s -i %s -a %s --weighted | tee %s " % ( ResolvePath("XAMPPbase/python/printCutFlow.py"), Out_File, region, CI_file_weighted) if os.system(Cflow_Cmd) != 0: print "ERROR: Could not process cutflow %s in file %s" % ( region, Out_File) del_cmd = "rm %s" % (CI_file_weighted) os.system(del_cmd)
def __get_job_options(self, runNumbers): if not self.engine().submit_hook(): logging.warning( "A job with the name {j} has already been submitted.".format( j=self.engine().job_name())) return CreateDirectory(self.engine().config_dir(), True) for r in runNumbers: jobFolder = os.path.join(self.__joboptions_dir, "{ddd}xxx".format(ddd=str(r)[:3])) if not os.path.isdir(jobFolder): logging.warning( "Job option folder {f} for DSID {r} does not exist. Skipping {r}..." .format(f=jobFolder, r=r)) continue dir_to_copy = os.path.join(jobFolder, str(r)) if len(dir_to_copy) == 0: continue shutil.copytree(dir_to_copy, os.path.join(self.engine().config_dir(), str(r))) # assemble the config file for the job option seeds = [] while len(seeds) < self.__nJobs: s = random.uniform(100000, 500000) if s not in seeds: seeds += [s] jo = [os.path.join(self.engine().config_dir(), str(r))][0] out_dir = os.path.join(self.evgen_dir(), str(r)) WriteList( (ReadListFromFile(self.seed_file()) if os.path.exists( self.seed_file()) else []) + ["%d" % (i) for i in seeds], self.seed_file(), ) WriteList( (ReadListFromFile(self.run_file()) if os.path.exists(self.run_file()) else []) + ["%d" % (r) for i in range(self.__nJobs)], self.run_file(), ) WriteList( (ReadListFromFile(self.job_file()) if os.path.exists(self.job_file()) else []) + [jo for i in range(self.__nJobs)], self.job_file(), ) WriteList( (ReadListFromFile(self.out_file()) if os.path.exists(self.out_file()) else []) + [out_dir for i in range(self.__nJobs)], self.out_file(), ) # submit the job array self.__n_scheduled += self.__nJobs logging.info("INFO <__get_job_options> Found %s" % (jo))
def submit_array(self, script, sub_job="", mem=-1, env_vars=[], hold_jobs=[], run_time="", array_size=-1): if not CreateDirectory(self.log_dir(), False): return False pwd = os.getcwd() os.chdir(self.log_dir()) exec_script = self.link_to_copy_area(script) if not exec_script: return False if mem < 0: print "ERROR: Please give a reasonable memory" return False if array_size < 1: print "ERROR: Please give a valid array size" return False submit_cmd = "qsub -o %s -m a -j y -l h_vmem=%dM -t 1-%d -l h_rt='%s' %s -N '%s' %s -cwd %s" % ( self.log_dir(), self.mail_user(), mem, array_size, run_time, self.__shedule_jobs(self.to_hold(hold_jobs), sub_job), self.subjob_name(sub_job), " - v ".join([ "%s='%s'" % (var, value) for var, value in env_vars + self.common_env_vars() ]), exec_script) os.chdir(pwd) return True
def submit_job(self, script, sub_job="", mem=-1, env_vars=[], hold_jobs=[], run_time="", n_cores=1): CreateDirectory(self.log_dir(), False) self.set_cluster_control_module( "ClusterSubmission/ClusterControlSLURM.sh") exec_script = self.pack_environment(env_vars, script) pwd = os.getcwd() os.chdir(self.config_dir()) if not exec_script: return False if mem < 0: logging.error("Please give a reasonable memory") return False if os.getenv("USER"): logging.info("Currently %d jobs are scheduled" % (get_num_scheduled(os.getenv("USER")))) submit_cmd = "sbatch --output=%s/%s.log --mail-type=FAIL --mail-user='******' --mem=%iM %s %s --job-name='%s' %s %s" % ( self.log_dir(), sub_job if len(sub_job) > 0 else self.job_name(), self.mail_user(), mem, self.__partition(run_time), self.__schedule_jobs(self.to_hold(hold_jobs), sub_job), self.subjob_name(sub_job), "" if len(self.excluded_nodes()) == 0 else "--exclude=" + ",".join(self.excluded_nodes()), exec_script) if os.system(submit_cmd): return False os.chdir(pwd) return True
def __init__( self, #### Container with all dataset names rucio_container, ### RSE where the container is stored dest_rse, #### Download the container to the disk download=False, #### Merge the datasets to a common file merge=False, ## Download directory download_dir="/tmp/download", #### Destination_dir destination_dir="/tmp", #### Cluster_engine for potential merge cluster_engine=None, #### max_size_per_merged_file (B) max_merged_size=25 * 1024 * 1024 * 1024, ### Logical dataset_name (optional) logical_name="", #### Rucio groupdisk protocol protocol="root", ## hold jobs hold_jobs=[], ### Files per merge job files_per_merge_job=20, ): self.__container_name = rucio_container self.__rse = dest_rse self.__download = download self.__merge = merge self.__download_dir = download_dir self.__files_per_merge = files_per_merge_job while self.__download_dir.find("//") != -1: self.__download_dir = self.__download_dir.replace("//", "/") self.__dest_dir = destination_dir self.__engine = cluster_engine self.__max_file_size = max_merged_size self.__logical_name = logical_name CheckRemainingProxyTime() self.__files_on_rse = [f for f in GetDataSetFiles(self.container(), self.rse(), protocol) if self._is_good_file(f)] if len(rucio_container) > 0 and not self.__download else [] #### List of files to be downloaded on disk self.__files_on_disk = [] if self.__download: CreateDirectory(self.ds_download_dir(), False) downloadDataSets(InputDatasets=[self.container()], Destination=self.__download_dir, use_singularity=True) self.__files_on_disk = [ "%s/%s" % (self.ds_download_dir(), f) for f in os.listdir(self.ds_download_dir()) if self._is_good_file(self.ds_download_dir() + "/" + f) ] self.__merge_interfaces = [] self.__hold_jobs = hold_jobs
def link_to_copy_area(self, config_file): config_path = ResolvePath(config_file) if not config_path: return None ### Create the directory CreateDirectory(self.config_dir(), False) ### Keep the ending of the file but rename it to a random thing final_path = "%s/%s.%s" % (self.config_dir(), id_generator(45), config_file[config_file.rfind(".") + 1:]) os.system("cp %s %s" % (config_path, final_path)) return final_path
def submit_array(self, script, sub_job="", mem=-1, env_vars=[], hold_jobs=[], run_time="", n_cores=1, array_size=-1): CreateDirectory(self.log_dir(), False) self.set_cluster_control_module( "ClusterSubmission/ClusterControlSLURM.sh") pwd = os.getcwd() os.chdir(self.config_dir()) if mem < 0: logging.error("Please give a reasonable memory") return False ArrayStart = 0 ArrayEnd = min(self.max_array_size(), array_size) logging.info(" <submit_array>: Submit array %s with size %d" % (self.subjob_name(sub_job), array_size)) while ArrayEnd > ArrayStart: n_jobs_array = min(array_size - ArrayStart, self.max_array_size()) exec_script = self.pack_environment( env_vars + [("IdOffSet", str(ArrayStart))], script) if not exec_script: return False if os.getenv("USER"): time.sleep(1) logging.info( "Going to add %d job to the currently %d scheduled ones" % (n_jobs_array, get_num_scheduled(os.getenv("USER")))) submit_cmd = "sbatch --output=%s/%s_%%A_%%a.log --array=1-%i%s --mail-type=FAIL --mail-user='******' --mem=%iM %s %s --job-name='%s' %s %s" % ( self.log_dir(), sub_job if len(sub_job) > 0 else self.job_name(), n_jobs_array, "" if n_jobs_array < self.max_running_per_array() else "%%%d" % (self.max_running_per_array()), self.mail_user(), mem, self.__partition(run_time), ### Shedule the job after the hold jobs and also after previous array blocks self.__schedule_jobs( self.to_hold(hold_jobs) + ([] if ArrayStart == 0 else [self.subjob_name(sub_job)]), sub_job), self.subjob_name(sub_job), "" if len(self.excluded_nodes()) == 0 else "--exclude=" + ",".join(self.excluded_nodes()), exec_script, ) if os.system(submit_cmd): return False ArrayStart = ArrayEnd ArrayEnd = min(ArrayEnd + self.max_array_size(), array_size) os.chdir(pwd) return True
def submit_build_job(self): if self.__submitted_build: print "ERROR <submit_build_job>: Build job is already submitted" return False if not self.submit_hook(): return False if self.send_build_job() and not self.submit_job( script="ClusterSubmission/Build.sh", sub_job="Build", mem=self.__buildMem, env_vars=[("CleanOut", self.out_dir()), ("CleanLogs", self.log_dir()), ("CleanTmp", self.tmp_dir()), ("nCoresToUse", self.__buildCores), ("COPYAREA", self.build_dir())], run_time=self.__buildTime, hold_jobs=self.__holdBuild): return False elif not self.send_build_job(): if not CreateDirectory(self.log_dir(), False) or not CreateDirectory( self.out_dir(), False): return False Dummy_Job = "%s/%s.sh" % (self.config_dir(), id_generator(35)) WriteList([ "#!/bin/bash", "echo \"I'm a dummy build job. Will wait 60 seconds until everything is scheduled\"", "sleep 120" ], Dummy_Job) if not self.submit_job(script=Dummy_Job, sub_job="Build", mem=100, env_vars=[], run_time="00:05:00", hold_jobs=self.__holdBuild): return False self.__submitted_build = True self.lock_area() return True
def __init__(self, outFileName="", files_to_merge=[], hold_jobs=[], cluster_engine=None, files_per_job=5, final_split=1, shuffle_files=True): self.__out_name = outFileName self.__shuffle_files = shuffle_files self.__cluster_engine = cluster_engine self.__hold_jobs = [h for h in hold_jobs] self.__files_per_job = files_per_job if files_per_job > 1 else 2 self.__merge_lists = self.__assemble_merge_list(files_to_merge) self.__tmp_out_files = [] self.__child_job = None self.__parent_job = None self.__submitted = False if len(self.__merge_lists) > final_split: self.__tmp_out_files = [ "%s/%s.root" % (self.engine().tmp_dir(), id_generator(100)) for d in range(len(self.__merge_lists)) ] self.__child_job = self.create_merge_interface( final_split=final_split) self.__child_job.set_parent(self) elif final_split == 1 or len(self.__merge_lists) == 1: CreateDirectory(self.engine().out_dir(), False) self.__tmp_out_files = [ "%s/%s.root" % (self.engine().out_dir(), self.outFileName()) ] else: CreateDirectory(self.engine().out_dir(), False) self.__tmp_out_files = [ "%s/%s_%d.root" % (self.engine().out_dir(), self.outFileName(), i + 1) for i in range(min(final_split, len(self.__merge_lists))) ]
def ExecuteAthena(RunOptions, AthenaArgs): """ @brief Execute athena with options specified in run options and athena arguments @param RunOptions The run options (these are modified to satisfy athena style) @param AthenaArgs The athena arguments (are directly joined to the athena command) """ ExeCmd = "athena.py %s %s" % (BringToAthenaStyle( RunOptions.jobOptions), " ".join(AthenaArgs)) if RunOptions.outFile.find("/") != -1: print("INFO: Will execute Athena in directory " + RunOptions.outFile.rsplit("/", 1)[0]) CreateDirectory(RunOptions.outFile.rsplit("/", 1)[0], False) os.chdir(RunOptions.outFile.rsplit("/", 1)[0]) if RunOptions.outFile.find("/") == len( RunOptions.outFile) - 1 or not IsROOTFile(RunOptions.outFile): print("ERROR: Please give a file to save not only the directory") exit(1) # options to run with valgrind # ---------------------------------------------------------------------------------------------------- if RunOptions.valgrind: if not any( os.access(os.path.join(path, 'valgrind'), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)): print( "ERROR: valgrind not avaliable - you should set up an ATLAS release that contains it or install it manually" ) exit(1) if RunOptions.valgrind == "callgrind": ExeCmd = "valgrind --suppressions=${ROOTSYS}/etc/valgrind-root.supp --tool=callgrind --smc-check=all --num-callers=50 --trace-children=yes " + ExeCmd print "INFO: You are running with valgrind's callgrind! Execute command modified to:" print ExeCmd elif RunOptions.valgrind == "memcheck": ExeCmd += " --config-only=rec.pkl --stdcmalloc" print "INFO: You are running with valgrind's memcheck. First will create picke file with Athena configuration, then execute valgrind." print "Creating pickle file ..." print ExeCmd if os.system(ExeCmd): print("ERROR: Creating python pickle file with Athena has failed") print("Running valgrind and storing output in valgrind.log ...") print ExeCmd ExeCmd = "valgrind --suppressions=${ROOTSYS}/etc/valgrind-root.supp --leak-check=yes --trace-children=yes --num-callers=50 --show-reachable=yes --track-origins=yes --smc-check=all `which python` `which athena.py` --stdcmalloc rec.pkl 2>&1 | tee valgrind.log" print( "Explanation of output: https://twiki.cern.ch/twiki/bin/view/AtlasComputing/UsingValgrind" ) # ---------------------------------------------------------------------------------------------------- if os.system(ExeCmd): print("ERROR: Athena execeution failed") os.system("rm %s" % (RunOptions.outFile)) exit(1)
def submit_build_job(self): if self.check_submitted_build(): logging.warning( "<submit_build_job>: Build job is already submitted") return True if not self.submit_hook(): return False ### Few cluster engines go crazy if the log files of the own jobs are deleted ### Make sure that the build job deletes the log dir before submission if not CreateDirectory(self.log_dir(), True): return False if self.send_build_job() and not self.submit_job( script="ClusterSubmission/Build.sh", sub_job="Build", mem=self.get_build_mem(), n_cores=self.get_build_cores(), env_vars=[("CleanOut", self.out_dir()), ("CleanTmp", self.tmp_dir()), ("nCoresToUse", self.get_build_cores()), ("COPYAREA", self.build_dir())], run_time=self.get_build_time(), hold_jobs=self.get_build_hold_jobs()): return False elif not self.send_build_job(): if not CreateDirectory(self.out_dir(), False): return False Dummy_Job = WriteList([ "#!/bin/bash", "echo \"I'm a dummy build job. Will wait 15 seconds until everything is scheduled\"", "sleep 15" ], "%s/%s.sh" % (self.config_dir(), id_generator(35))) if not self.submit_job(script=Dummy_Job, sub_job="Build", mem=100, run_time="00:05:00", hold_jobs=self.__holdBuild): return False self.__submitted_build = True self.lock_area() return True
def download_ci_files(options): ### Retrieve first the EOS token getEOS_token(options) ### Check first whether the CI dir actually exits smp_dir = "%s/datasamples/" % (options.ciDir) if not os.path.isdir(smp_dir): print "ERROR: The path to look up for the data samples %s does not exists. Where is my data" % ( smp_dir) exit(1) ### Create first the directory to store the temporary files in there ### Clean the old remants CreateDirectory(options.TEMPdir, True) downloaded_smp = [] for smp in os.listdir(smp_dir): smp_name = smp[:smp.rfind(".")] print "INFO: Download the files from sample %s" % (smp_name) download_to = "%s/%s" % (options.TEMPdir, smp_name) CreateDirectory(download_to, False) ### Download the files first for file_to_load in ReadListFromFile("%s/%s" % (smp_dir, smp)): destination_file = "%s/%s" % ( download_to, file_to_load[file_to_load.rfind("/") + 1:]) CopyCmd = "xrdcp %s/%s %s" % (options.EOSpath, file_to_load, destination_file) if os.path.exists(destination_file): print "INFO: Omit do download %s" % (file_to_load) elif os.system(CopyCmd) != 0: print "ERROR: Failed to download %s" % (file_to_load) exit(1) ### Write the file list for the analysis file_list = "%s/FileList_%s.txt" % (options.TEMPdir, smp_name) WriteList([ "%s/%s" % (download_to, f[f.rfind("/") + 1:]) for f in ReadListFromFile("%s/%s" % (smp_dir, smp)) ], file_list) downloaded_smp += [smp_name] return downloaded_smp
def createFileList(dsname, options): logging.info('Creating file list for ' + dsname) DS = GetDataSetFiles(dsname, options.RSE, options.protocols) if len(DS) == 0: logging.error("No datasets found") return if dsname.find(":") > -1: dsname = dsname[dsname.find(":") + 1:len(dsname)] CreateDirectory(options.OutDir, False) filelistname = options.OutDir + "/" + dsname.rstrip('/') + ".txt" if os.path.exists(filelistname) == True: logging.info("Remove the old FileList") os.system("rm " + filelistname) WriteList(DS, filelistname)
def __extract_seeds(self, run): try: EVNT_DIR = [ os.path.join(self.evgen_dir(), R) for R in os.listdir(self.evgen_dir()) if R.startswith(str(run)) ][0] except: return logging.info( "<__extract_seeds> Searching {evntdir} for EVNT files not already processed in derivation format {d}." .format(evntdir=EVNT_DIR, d=self.__derivation)) DERIVATION_DIR = os.path.join(self.aod_dir(), EVNT_DIR[EVNT_DIR.rfind("/") + 1:]) CreateDirectory(DERIVATION_DIR, False) Evnt_Seeds = [ int(E[E.find("EVNT") + 5:E.find(".pool")]) for E in os.listdir(EVNT_DIR) if E.endswith(".root") ] DAOD_Seeds = [ int(A.split(".")[-2]) for A in os.listdir(DERIVATION_DIR) if A.find(self.__derivation) != -1 and A.endswith(".root") ] Non_ProcSeeds = [seed for seed in Evnt_Seeds if seed not in DAOD_Seeds] if len(Non_ProcSeeds) == 0: return logging.info("Extracted seeds for run {r}:".format(r=run)) logging.info(" +-=- {s}".format( s=", ".join([str(seed) for seed in Non_ProcSeeds]))) WriteList( (ReadListFromFile(self.seed_file()) if os.path.exists(self.seed_file()) else []) + [str(seed) for seed in Non_ProcSeeds], self.seed_file(), ) WriteList( (ReadListFromFile(self.run_file()) if os.path.exists(self.run_file()) else []) + [str(run) for seed in Non_ProcSeeds], self.run_file(), ) WriteList( (ReadListFromFile(self.in_file()) if os.path.exists(self.in_file()) else []) + [EVNT_DIR for seed in Non_ProcSeeds], self.in_file(), ) self.__n_scheduled += len(Non_ProcSeeds)
def ExecuteAthena(RunOptions, AthenaArgs): if RunOptions.outFile.find("/") != -1: print("INFO: Will execute Athena in directory " + RunOptions.outFile.rsplit("/", 1)[0]) CreateDirectory(RunOptions.outFile.rsplit("/", 1)[0], False) os.chdir(RunOptions.outFile.rsplit("/", 1)[0]) if RunOptions.outFile.find("/") == len( RunOptions.outFile) - 1 or not RunOptions.outFile.endswith( ".root"): print("ERROR: Please give a file to save not only the directory") exit(1) ExeCmd = "athena.py %s %s" % (AssembleConfigArgument(AthenaArgs), BringToAthenaStyle(RunOptions.jobOptions)) if os.system(ExeCmd): print("ERROR: Athena execeution failed") os.system("rm %s" % (RunOptions.outFile)) exit(1)
def submit_array(self, script, sub_job="", mem=-1, env_vars=[], hold_jobs=[], run_time="", array_size=-1): if not CreateDirectory(self.log_dir(), False): return False pwd = os.getcwd() os.chdir(self.log_dir()) exec_script = self.link_to_copy_area(script) if not exec_script: return False if mem < 0: print "ERROR: Please give a reasonable memory" return False ArrayStart = 0 ArrayEnd = min(self.max_array_size(), array_size) while ArrayEnd > ArrayStart: n_jobs_array = min(array_size - ArrayStart, self.max_array_size()) submit_cmd = "sbatch --output=%s/%s_%%A_%%a.log --array=1-%i%s --mail-type=FAIL --mail-user='******' --mem=%iM %s %s --job-name='%s' --export=%s %s" % ( self.log_dir(), sub_job if len(sub_job) > 0 else self.job_name(), n_jobs_array, "" if n_jobs_array < self.max_running_per_array() else "%%%d" % (self.max_running_per_array()), self.mail_user(), mem, self.__partition(run_time), self.__shedule_jobs(self.to_hold(hold_jobs), sub_job), self.subjob_name(sub_job), ",".join([ "%s='%s'" % (var, value) for var, value in ([('IdOffSet', '%i' % (ArrayStart))] + env_vars + self.common_env_vars()) ]), exec_script, ) if os.system(submit_cmd): return False ArrayStart = ArrayEnd ArrayEnd = min(ArrayEnd + self.max_array_size(), array_size) os.chdir(pwd) return True
def submit_job(self, script, sub_job="", mem=-1, env_vars=[], hold_jobs=[], run_time="", n_cores=1): if not CreateDirectory(self.log_dir(), False): return False exec_script = self.link_to_copy_area(script) pwd = os.getcwd() os.chdir(self.log_dir()) if not exec_script: return False if mem < 0: logging.error("Please give a reasonable memory") return False additionalOptions = "-l cput={run_time} -l walltime={run_time} ".format( run_time=run_time) additionalOptions += "-l nodes=1:ppn={} ".format(n_cores) env_vars.append( ("ATLAS_LOCAL_ROOT_BASE", "")) # Force scripts re-asetup submit_cmd = "qsub -o {log_dir} -j oe {dependencies} -N '{jobName}' {env_vars} {additionalOptions} {exec_script}".format( log_dir=self.log_dir(), dependencies=self.__schedule_jobs(self.to_hold(hold_jobs), sub_job), jobName=self.subjob_name(sub_job), env_vars=" -v \"" + ",".join([ "%s='%s'" % (var, value) for var, value in (env_vars + self.common_env_vars()) ]) + "\"", additionalOptions=additionalOptions, exec_script=exec_script) print(submit_cmd) if os.system(submit_cmd): logging.error("Failed to submit " + submit_cmd) return False os.chdir(pwd) return True
def run_athena_cmds(options, job_options="", extra_args=[]): Athena_Cmds = [] Proccessed_Smp = [] for smp_name in download_ci_files(options): file_list = "%s/FileList_%s.txt" % (options.TEMPdir, smp_name) ### now start athena athena_outfile = "%s/athena_%s/CI.root" % (options.TEMPdir, smp_name) athena_logfile = "%s/athena_%s/CI.log" % (options.TEMPdir, smp_name) CreateDirectory("%s/athena_%s/" % (options.TEMPdir, smp_name), False) athena_cmd = "python %s %s --parseFilesForPRW --jobOptions %s --evtMax %d %s --filesInput %s --outFile %s 2>&1 > %s" % ( options.athenaArgParser, " ".join(extra_args), job_options, options.evtMax, "--noSyst" if options.noSyst else "", file_list, athena_outfile, athena_logfile) ### Paralellize the athena commands Athena_Cmds += [athena_cmd] Proccessed_Smp += [(smp_name, athena_outfile)] ExecuteCommands(Athena_Cmds, MaxCurrent=4) return Proccessed_Smp
def __init__(self, thread_name="", subthread=-1, thread_engine=None, dependencies=[], script_exec=""): threading.Thread.__init__(self) self.__engine = thread_engine self.__name = thread_name self.__sub_num = subthread self.__isSuccess = False self.__started = False self.__dependencies = [d for d in dependencies] self.__script_to_exe = script_exec self.__tmp_dir = "%s/%s" % (thread_engine.tmp_dir(), id_generator(50)) CreateDirectory(self.__tmp_dir, True) self.__env_vars = [("LOCAL_TASK_ID", "%d" % (self.thread_number())), ("TMPDIR", self.__tmp_dir)]
def submit_merge(self): self.prepare_merge() ### No merging interfaces are made indeed if len(self.__merge_interfaces) == 0: return True ### Make sure that the final directory is empty before merging if len(self.container(True)) > 0: CreateDirectory(self.ds_final_dir(), True) hold_jobs = [] ### merge jobs are submitted for merge in self.__merge_interfaces: if not merge.submit_job(): return False hold_jobs += [self.engine().subjob_name("merge-%s" % (merge.outFileName()))] if self.__download and not self.engine().submit_clean_job( hold_jobs=hold_jobs, to_clean=[self.ds_download_dir()], sub_job=self.logical_name()): return False return self.engine().submit_move_job( hold_jobs=hold_jobs + [self.engine().subjob_name("Clean-%s" % (self.logical_name()))], to_move=["%s/%s.root" % (self.engine().out_dir(), merge.outFileName()) for merge in self.__merge_interfaces], ### Give particular files to move destination=self.ds_final_dir(), sub_job=self.logical_name())
def __init__( self, cluster_engine=None, jobOptions="", input_ds=[], run_time="19:59:59", dcache_dir="", alg_opt="", ### Extra options of the algorithm like noSyst... etc vmem=2000, events_per_job=100000, hold_jobs=[], files_per_merge=10, final_split=1, ): self.__cluster_engine = cluster_engine ### Job splitting configurations self.__events_per_job = events_per_job self.__dcache_dir = dcache_dir self.__dcache_loc = ResolvePath(dcache_dir) ### analysis job configurations self.__job_options = jobOptions self.__alg_opt = alg_opt self.__run_time = run_time self.__vmem = vmem ### Hold jobs self.__hold_jobs = [H for H in hold_jobs] ### Merging self.__merge_interfaces = [] self.__files_per_merge_itr = files_per_merge self.__final_split = final_split self.__nsheduled = 0 for ds in sorted(input_ds): if not self.__prepare_input(ds): CreateDirectory(self.engine().config_dir(), True) self.__nsheduled = 0 return False
def __prepare_input(self, in_ds=""): print "INFO <_prepare_input>: Assemble configuration for %s" % (in_ds) ### Name to be piped to the job out_name = in_ds[in_ds.rfind("/") + 1:in_ds.rfind(".")] if IsTextFile( in_ds) or IsROOTFile(in_ds) else in_ds split_dir = "%s/Datasets/%s" % (self.split_cfg_dir(), out_name) root_files = [] ### Now we need to find the corresponding ROOT files ### 1) The dataset is a root file itself if IsROOTFile(in_ds): root_files += [in_ds] ### 2) The given dataset is a .txt file elif IsTextFile(in_ds): ### Find the root files from there root_files = self.__extract_root_files(in_ds) if len(root_files) == 0: return False ### 3) The given dataset is a directory elif os.path.isdir(in_ds): if in_ds.endswith("/"): in_ds = in_ds[:in_ds.rfind("/")] out_name = in_ds[in_ds.rfind("/") + 1:] split_dir = "%s/Directory/%s" % (self.split_cfg_dir(), out_name) root_files = [ "%s/%s" % (in_ds, F) for F in os.listdir(in_ds) if IsROOTFile(F) ] ### 4) It's a logical dataset stored on d-cache else: root_files = self.__find_on_dcache(in_ds) if len(root_files) == 0: print "ERROR: Could not associate anything to %s" % (in_ds) return False if len(out_name) == 0: print "ERROR: How should the output be called %s" % (in_ds) return False ### Assemble the splitting of the jobs main_list = "%s/AllROOTFiles.main" % (split_dir) files_in_main = ReadListFromFile(main_list) if os.path.exists( main_list) else [] ### The list is unkown or the content of ROOT files has changed ### Redo the splitting again ;-) if len(files_in_main) != len(root_files) or not IsListIn( files_in_main, root_files): print "INFO: Assemble new split for %s" % (in_ds) CreateDirectory(split_dir, True) WriteList(root_files, main_list) os.system("CreateBatchJobSplit -I %s -O %s -EpJ %i" % (main_list, split_dir, self.__events_per_job)) ### Each of the lists contains the ROOT files to process per each sub job split_lists = [ "%s/%s" % (split_dir, F) for F in os.listdir(split_dir) if IsTextFile(F) ] n_jobs = len(split_lists) subjob_outs = [ "%s/%s_%d.root" % (self.engine().tmp_dir(), out_name, d) for d in range(n_jobs) ] assembled_in = [] if not os.path.exists( self.job_input()) else ReadListFromFile(self.job_input()) assembled_out = [] if not os.path.exists( self.job_out_names()) else ReadListFromFile(self.job_out_names()) start_reg = len(assembled_in) ### Write what we've WriteList(assembled_in + split_lists, self.job_input()) WriteList(assembled_out + subjob_outs, self.job_out_names()) #### Submit the merge jobs self.__merge_interfaces += [ self.engine().create_merge_interface( out_name=out_name, files_to_merge=subjob_outs, hold_jobs=[(self.engine().job_name(), [start_reg + i + 1 for i in range(n_jobs)])], files_per_job=self.__files_per_merge_itr, final_split=self.__final_split) ] self.__nsheduled += n_jobs return True
)) print 'INFO: Found the following tags:\n' for campaign in sortedSamples.iterkeys(): for stype in sortedSamples[campaign].iterkeys(): rtagsfound = sorted( [r for r in sortedSamples[campaign][stype].iterkeys()]) print '%s (%s):\t %s' % (campaign, stype, rtagsfound) if RunOptions.printTagsOnly: sys.exit(0) if len(sortedSamples) == 0: print 'INFO: No samples found, exiting...' sys.exit(1) CreateDirectory(RunOptions.outDir, CleanUpOld=False) # save the final PRW files here #### now, download the NTUP_PILEUPs, merge them and move them to the final output directory ExecuteThreads(MergingTools, MaxCurrent=2) #### Everything is merged now let's check if a consistency check is demanded if not RunOptions.doConsistencyCheck: print "#############################################################################################################" print "INFO: Merged successfully %d prw Files in %s" % ( len(MergingTools), RunOptions.outDir) print "INFO: You did not request if all datasets are running in 2015-2017 data" print "INFO: Attentiton if you're running with the lumi-calc files from all three years it might be" print " that few of your jobs die because the dsid is only avaialble in mc16a/mc16d" print " We recommend to perform a consistency check of your merged prw files before hand, where it is checked" print " that all of your dsid in the prw files are running with all three lumicalc files. Missing prw ntuples" print " are submitted otherwise, if the dataset is available in both campaigns." print "#############################################################################################################"
def main(): """List datasets located at a RSE location.""" CheckRucioSetup() CheckRemainingProxyTime() RunOptions = getArgumentParser().parse_args() Today = time.strftime("%Y-%m-%d") Patterns = RunOptions.pattern OutDir = RunOptions.OutDir RSE = RunOptions.RSE if ',' in RSE: RSE = RSE.split( ',' )[0] # in case people have more than one RSE in their environment variable for grid submits Prefix = '' if RunOptions.MyRequests: Prefix = 'MyRequestTo_' DS = ListUserRequests(RSE, RunOptions.rucio) else: DS = ListDisk(RSE) ### MetaFile = open("Content_%s.txt"%(RSE), 'w') ### for DataSet, Size in ListDiskWithSize(RSE): ### Owner, ID = GetDataSetInfo(DataSet,RSE) ### line = "%s | %s | %s | %.2f GB"%(ID, Owner,DataSet, Size) ### MetaFile.write("%s\n"%(line)) ### print line ### MetaFile.close() ### exit(0) if len(DS) == 0: logging.warning("Disk is empty.") exit(0) CreateDirectory(OutDir, False) ########### # Define the file list name ########### FileList = "%s%s_%s" % (Prefix, RSE, Today) if len(Patterns) > 0: FileList += "_%s" % ('_'.join(Patterns)) if len(RunOptions.exclude) > 0: FileList += "_exl_%s" % ('_'.join(RunOptions.exclude)) FileList += '.txt' Write = [] for d in sorted(DS): allPatternsFound = True for Pattern in Patterns: if not Pattern in d: allPatternsFound = False break for Pattern in RunOptions.exclude: if Pattern in d: allPatternsFound = False break if allPatternsFound: IsInWrite = False if d.split(".")[-1].isdigit(): d = d[:d.rfind(".")] if d.find("_tid") != -1: d = d[0:d.rfind("_tid")] if len([w for w in Write if w.find(d) != -1]) > 0: continue logging.info("Write dataset %s" % (d)) Write.append(d) if len(Write) == 0: logging.error("No datasets containing given pattern(s) found!") exit(0) WriteList(Write, "%s/%s" % (OutDir, FileList)) logging.info("Datasets written to file %s/%s" % (OutDir, FileList))
else: DS = ListDisk(RSE) # MetaFile = open("Content_%s.txt"%(RSE), 'w') # for DataSet, Size in ListDiskWithSize(RSE): # Owner, ID = GetDataSetInfo(DataSet,RSE) # line = "%s | %s | %s | %.2f GB"%(ID, Owner,DataSet, Size) # MetaFile.write("%s\n"%(line)) # print line # MetaFile.close() # exit(0) if len(DS) == 0: print "INFO: Disk is empty." exit(0) CreateDirectory(OutDir, False) ########### # Define the file list name ########### FileList = "%s%s_%s" % (Prefix, RSE, Today) if len(Patterns) > 0: FileList += "_%s" % ('_'.join(Patterns)) if len(RunOptions.exclude) > 0: FileList += "_exl_%s" % ('_'.join(RunOptions.exclude)) FileList += '.txt' Write = [] for d in sorted(DS): allPatternsFound = True for Pattern in Patterns: if not Pattern in d: allPatternsFound = False