def test_workflow(self): import ROOT as r basepath = "/tmp/{}/metis/localmerge/".format(os.getenv("USER")) # Make the base directory MutableFile(basepath).touch() # Clean up before running do_cmd("rm {}/*.root".format(basepath)) for i in range(0, 3): f = r.TFile("{}/in_{}.root".format(basepath, i), "RECREATE") h = r.TH1F() h.Write() f.Close() outname = "/home/users/namin/2017/test/ProjectMetis/testout/out.root" task = LocalMergeTask( # input_filenames=glob.glob("/hadoop/cms/store/user/namin/AutoTwopler_babies/FT_v1.06_v2/W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8_RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/output/output_4*.root"), input_filenames=glob.glob(basepath + "/in_*.root"), output_filename=basepath + "/out.root", ) task.process() self.assertEqual(task.get_outputs()[0].exists(), True)
def test_workflow(self): basepath = "/tmp/{}/metis/".format(os.getenv("USER")) # Clean up before running do_cmd("rm {}/*.root".format(basepath)) # Make the base directory MutableFile(basepath).touch() # Set up 4 layers of input->output files step0, step1, step2, step3 = [], [], [], [] for i in range(3): step0.append( MutableFile(name="{}/step0_{}.root".format(basepath, i))) step1.append( MutableFile(name="{}/step1_{}.root".format(basepath, i))) step2.append( MutableFile(name="{}/step2_{}.root".format(basepath, i))) step3.append( MutableFile(name="{}/step3_{}.root".format(basepath, i))) # Touch the step0 files to ensure they "exist", but they're still empty list(map(lambda x: x.touch(), step0)) # Make a DummyMoveTask with previous inputs, outputs # each input will be moved to the corresponding output file # by default, completion fraction must be 1.0, but can be specified t1 = DummyMoveTask( inputs=step0, outputs=step1, # min_completion_fraction = 0.6, ) # Clone first task for subsequent steps t2 = t1.clone(inputs=step1, outputs=step2) t3 = t1.clone(inputs=step2, outputs=step3) # Make a path, which will run tasks in sequence provided previous tasks # finish. Default dependency graph ("scheduled mode") will make it so # that t2 depends on t1 and t3 depends on t1 pa = Path([t1, t2]) pb = Path([t3]) # Yes, it was silly to make two paths, but that was done to showcase # the following concatenation ability (note that "addition" here is not # commutative) p1 = pa + pb while not p1.complete(): p1.process() time.sleep(0.02) self.assertEqual(p1.complete(), True)
def process(self): """ Moves (one-to-one) input files to output files """ for inp, out in zip(self.get_inputs(), self.get_outputs()): if self.create_inputs and not inp.exists(): self.logger.debug("Specified create_inputs=True, so creating input file {}".format(inp.get_name())) do_cmd("touch {}".format(inp.get_name())) inp.recheck() do_cmd("mv {} {}".format(inp.get_name(), out.get_name())) out.recheck() self.logger.debug("Running on {0} -> {1}".format(inp.get_name(), out.get_name()))
def get_globaltag(self): if self.info.get("gtag", None): return self.info["gtag"] if self.dasgoclient: cmd = "dasgoclient -query 'config dataset={} system=dbs3' -json".format( self.info["dataset"]) js = json.loads(do_cmd(cmd)) response = js[0]["config"][0] else: response = self.do_dis_query(self.info["dataset"], typ="config") self.info["gtag"] = str(response["global_tag"]) self.info["native_cmssw"] = str(response["release_version"]) return self.info["gtag"]
def test_config_parameters(self): return from metis.CrabManager import CrabManager basepath = "/tmp/{0}/metis/crab_test/".format(os.getenv("USER")) do_cmd("mkdir -p {0}".format(basepath)) do_cmd("touch {0}/pset.py".format(basepath)) dataset = "/TTZToLL_M-1to10_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/MINIAODSIM" dataset_user = "******" request_name = "test_metis_ttzlowmass" pset_location = "{0}/pset.py".format(basepath) cm1 = CrabManager( dataset=dataset, request_name=request_name, pset_location=pset_location, ) cfg = cm1.get_crab_config() self.assertEqual(cfg.JobType.pluginName, "Analysis") self.assertEqual(cfg.JobType.psetName, pset_location) self.assertEqual(cfg.Data.inputDataset, dataset) self.assertEqual(cfg.Data.splitting, "FileBased") self.assertEqual(cfg.Data.inputDBS, "global") cm2 = CrabManager( dataset=dataset_user, request_name=request_name, pset_location=pset_location, plugin_name="MyPlugin", ) cfg = cm2.get_crab_config() self.assertEqual(cfg.Data.inputDataset, dataset_user) self.assertEqual(cfg.JobType.pluginName, "MyPlugin") self.assertEqual(cfg.Data.inputDBS, "phys03")
def get_unique_request_name(self): # trivial check if self.unique_request_name: return self.unique_request_name # more robust check crablog = "{0}/crab.log".format(self.task_dir) if os.path.isfile(crablog): taskline = do_cmd("/bin/grep 'Success' -A 1 -m 1 {0} | /bin/grep 'Task name'".format(crablog)) if "Task name:" in taskline: self.unique_request_name = taskline.split("Task name:")[1].strip() self.logger.debug("found crablog {0} and parsing to find unique_request_name: {1}".format(crablog, self.unique_request_name)) return self.unique_request_name return None
def load_from_dasgoclient(self): cmd = "dasgoclient -query 'file dataset={}' -json".format( self.info["dataset"]) js = json.loads(do_cmd(cmd)) fileobjs = [] for j in js: f = j["file"][0] if (not hasattr(self, "selection") or self.selection(fdict["name"])): fileobjs.append( FileDBS(name=f["name"], nevents=f["nevents"], filesizeGB=round(f["size"] * 1e-9, 2))) fileobjs = sorted(fileobjs, key=lambda x: x.get_name()) self.info["files"] = fileobjs self.info["nevts"] = sum(fo.get_nevents() for fo in fileobjs)
cmssw_version = "CMSSW_9_2_8", scram_arch = "slc6_amd64_gcc530", no_load_from_backup = True, ) maker_tasks.append(maker_task) merge_tasks.append(merge_task) for i in range(100): total_summary = {} for maker_task, merge_task in zip(maker_tasks,merge_tasks): maker_task.process() if maker_task.complete(): do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir())) do_cmd("mkdir -p {}/skimmed".format(maker_task.get_outputdir())) merge_task.reset_io_mapping() merge_task.update_mapping() merge_task.process() total_summary[maker_task.get_sample().get_datasetname()] = maker_task.get_task_summary() total_summary[merge_task.get_sample().get_datasetname()] = merge_task.get_task_summary() # parse the total summary and write out the dashboard StatsParser(data=total_summary, webdir="~/public_html/dump/metis_stopbaby/").do() # 1 hr power nap time.sleep(15.*60) # If it's complete, make a dummy sample out of the output directory
tag=tag, cmssw_version="CMSSW_9_2_8", scram_arch="slc6_amd64_gcc530", no_load_from_backup=True, ) maker_tasks.append(maker_task) merge_tasks.append(merge_task) for i in range(100): total_summary = {} for maker_task, merge_task in zip(maker_tasks, merge_tasks): maker_task.process() if maker_task.complete(): do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir())) do_cmd("mkdir -p {}/skimmed".format( maker_task.get_outputdir())) merge_task.reset_io_mapping() merge_task.update_mapping() merge_task.process() total_summary[maker_task.get_sample().get_datasetname( )] = maker_task.get_task_summary() total_summary[merge_task.get_sample().get_datasetname( )] = merge_task.get_task_summary() # parse the total summary and write out the dashboard StatsParser(data=total_summary, webdir="~/public_html/dump/metis_stopbaby/").do()
def get_global_tag(psetname): return do_cmd("""tac {} | grep process.GlobalTag.globaltag | head -1 | cut -d '"' -f2""".format(psetname))
# print "cd .." # print print "mkdir -p {taskname}".format(taskname=taskname) print "(".format(taskname=taskname) print "cd {taskname} ; cp ../pset.py .".format(taskname=taskname) print "cmsRun -n 1 pset.py inputs={infiles} output={outname} globaltag={globaltag} {psetargs} && ".format(infiles=infiles,outname=outname,globaltag=globaltag,psetargs=psetargs) print "gfal-copy -p -f -t 4200 --verbose file://`pwd`/{outname} gsiftp://gftp.t2.ucsd.edu{outdir}{outname} --checksum ADLER32".format(outname=outname, outdir=outdir) print ") >& {taskname}/log_{index}.txt &".format(taskname=taskname,index=index) print """# condor_rm -const 'taskname=="{taskname}" && jobnum=="{jobnum}"' """.format(taskname=taskname,jobnum=jobnum) print if todownload: # transpose (so first element is the first file for each job, second is second, etc.; pad with None) todownload = map(list,izip_longest(*todownload)) # get single list filtering out nontruthy stuff todownload = filter(None,sum(todownload,[])) print "\n".join(todownload) if __name__ == "__main__": # print_commands("546478.0 546478.9") selstr = "" # selstr = "547165.230 547454.0 547453.0 547553.0 " # selstr = "549298.3" cids = " ".join(do_cmd("condor_q -w -nobatch %s | grep Run201 | awk '{print $1}'" % selstr).split()) # print_commands(cids) print_commands(cids, localcache=True)
maker_tasks.append(child_tasks) merge_tasks.append(merge_task) for i in range(100): total_summary = {} for child_tasks, merge_task in zip(maker_tasks,merge_tasks): all_child_finishes = True for maker_task in child_tasks: maker_task.process() if not maker_task.complete(): all_child_finishes = False if all_child_finishes: do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir())) merge_task.reset_io_mapping() merge_task.update_mapping() merge_task.process() if merge_task.complete(): outfile = '{}/{}_1.root'.format(merge_task.get_outputdir(), merge_task.output_name.split('.')[0]) target = outfile.split('resub')[0] tmp = merge_task.get_sample().get_datasetname().split('_') target += 'stopBaby_{}/{}'.format('_'.join(tmp[1:-2]), merge_task.output_name) # print('cp {} {}'.format(outfile, target)) print('cp {} {}'.format(outfile, target)) do_cmd('cp {} {}'.format(outfile, target)) print(target, "finished!!") total_summary[maker_task.get_sample().get_datasetname()] = maker_task.get_task_summary()
gfal-copy -p -f -t 4200 --verbose file://`pwd`/tmp.txt gsiftp://gftp.t2.ucsd.edu${OUTPUTDIR}/${OUTPUTNAME}_${IFILE}.txt --checksum ADLER32 """) exefile.chmod("u+x") # Make a CondorTask (3 in total, one for each input) task = CondorTask( sample=ds, files_per_output=1, tag="v0", output_name="output.txt", executable=exefile.get_name(), condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"}, no_load_from_backup= True, # for the purpose of the example, don't use a backup ) do_cmd("rm -rf {0}".format(task.get_outputdir())) # Process and sleep until complete is_complete = False for t in [5.0, 5.0, 10.0, 15.0, 20.0]: task.process() print("Sleeping for {0} seconds".format(int(t))) time.sleep(t) is_complete = task.complete() if is_complete: break # If it's complete, make a dummy sample out of the output directory # in order to pick up the files. Then cat out the contents and sum # them up. This should be 3*2*10 = 100 if is_complete: print("Job completed! Checking outputs...")
def get_taskdir(self): task_dir = "{0}/tasks/{1}/".format(self.get_basedir(), self.unique_name) if not os.path.exists(task_dir): do_cmd("mkdir -p {0}/logs/std_logs/".format(task_dir)) return os.path.normpath(task_dir)
def condor_submit(**kwargs): # pragma: no cover """ Takes in various keyword arguments to submit a condor job. Returns (succeeded:bool, cluster_id:str) fake=True kwarg returns (True, -1) multiple=True will let `arguments` and `selection_pairs` be lists (of lists) and will queue up one job for each element """ if kwargs.get("fake",False): return True, -1 for needed in ["executable","arguments","inputfiles","logdir"]: if needed not in kwargs: raise RuntimeError("To submit a proper condor job, please specify: {0}".format(needed)) params = {} queue_multiple = kwargs.get("multiple",False) params["universe"] = kwargs.get("universe", "Vanilla") params["executable"] = kwargs["executable"] # params["inputfiles"] = ",".join(kwargs["inputfiles"]) params["logdir"] = kwargs["logdir"] params["proxy"] = get_proxy_file() params["timestamp"] = get_timestamp() exe_dir = params["executable"].rsplit("/",1)[0] if "/" not in os.path.normpath(params["executable"]): exe_dir = "." # http://uaf-10.t2.ucsd.edu/~namin/dump/badsites.html good_sites = [ "T2_US_Caltech", "T2_US_UCSD", "T2_US_MIT", "T2_US_Nebraska", # "T2_US_Purdue", # Issues with fortran?? even though we're in singularity?? "T2_US_Vanderbilt", ] params["sites"] = kwargs.get("sites",",".join(good_sites)) if queue_multiple: if len(kwargs["arguments"]) and (type(kwargs["arguments"][0]) not in [tuple,list]): raise RuntimeError("If queueing multiple jobs in one cluster_id, arguments must be a list of lists") params["arguments"] = map(lambda x: " ".join(map(str,x)), kwargs["arguments"]) params["inputfiles"] = map(lambda x: ",".join(map(str,x)), kwargs["inputfiles"]) params["extra"] = [] if "selection_pairs" in kwargs: sps = kwargs["selection_pairs"] if len(sps) != len(kwargs["arguments"]): raise RuntimeError("Selection pairs must match argument list in length") for sel_pairs in sps: extra = "" for sel_pair in sel_pairs: if len(sel_pair) != 2: raise RuntimeError("This selection pair is not a 2-tuple: {0}".format(str(sel_pair))) extra += '+{0}="{1}"\n'.format(*sel_pair) params["extra"].append(extra) else: params["arguments"] = " ".join(map(str,kwargs["arguments"])) params["inputfiles"] = ",".join(map(str,kwargs["inputfiles"])) params["extra"] = "" if "selection_pairs" in kwargs: for sel_pair in kwargs["selection_pairs"]: if len(sel_pair) != 2: raise RuntimeError("This selection pair is not a 2-tuple: {0}".format(str(sel_pair))) params["extra"] += '+{0}="{1}"\n'.format(*sel_pair) params["proxyline"] = "x509userproxy={proxy}".format(proxy=params["proxy"]) # Require singularity+cvmfs unless machine is uaf-*. or uafino. # NOTE, double {{ and }} because this gets str.format'ted later on # Must have singularity&cvmfs. Or, (it must be uaf or uafino computer AND if a uaf computer must not have too high of slotID number # so that we don't take all the cores of a uaf requirements_line = 'Requirements = ((HAS_SINGULARITY=?=True) && (HAS_CVMFS_cms_cern_ch =?= true)) || (regexp("(uaf-[0-9]{{1,2}}|uafino)\.", TARGET.Machine) && !(TARGET.SlotID>(TotalSlots<14 ? 3:7) && regexp("uaf-[0-9]", TARGET.machine)))' if kwargs.get("universe","").strip().lower() in ["local"]: kwargs["requirements_line"] = "Requirements = " if kwargs.get("requirements_line","").strip(): requirements_line = kwargs["requirements_line"] template = """ universe={universe} +DESIRED_Sites="{sites}" executable={executable} transfer_executable=True transfer_output_files = "" +Owner = undefined +project_Name = \"cmssurfandturf\" log={logdir}/{timestamp}.log output={logdir}/std_logs/1e.$(Cluster).$(Process).out error={logdir}/std_logs/1e.$(Cluster).$(Process).err notification=Never should_transfer_files = YES when_to_transfer_output = ON_EXIT """ template += "{0}\n".format(params["proxyline"]) template += "{0}\n".format(requirements_line) if kwargs.get("stream_logs",False): template += "StreamOut=True\nstream_error=True\nTransferOut=True\nTransferErr=True\n" for ad in kwargs.get("classads",[]): if len(ad) != 2: raise RuntimeError("This classad pair is not a 2-tuple: {0}".format(str(ad))) template += '+{0}="{1}"\n'.format(*ad) do_extra = len(params["extra"]) == len(params["arguments"]) if queue_multiple: template += "\n" for ijob,(args,inp) in enumerate(zip(params["arguments"],params["inputfiles"])): template += "arguments={0}\n".format(args) template += "transfer_input_files={0}\n".format(inp) if do_extra: template += "{0}\n".format(params["extra"][ijob]) template += "queue\n" template += "\n" else: template += "arguments={0}\n".format(params["arguments"]) template += "transfer_input_files={0}\n".format(params["inputfiles"]) template += "{0}\n".format(params["extra"]) template += "queue\n" if kwargs.get("return_template",False): return template.format(**params) buff = template.format(**params) with open("{0}/submit.cmd".format(exe_dir),"w") as fhout: fhout.write(buff) extra_cli = "" schedd = kwargs.get("schedd","") # see note in condor_q about `schedd` if schedd: extra_cli += " -name {} ".format(schedd) do_cmd("mkdir -p {}/std_logs/ ".format(params["logdir"])) # print buff # # FIXME print "Wrote the file, and now exiting" sys.exit() out = do_cmd("condor_submit {}/submit.cmd {}".format(exe_dir,extra_cli)) succeeded = False cluster_id = -1 if "job(s) submitted to cluster" in out: succeeded = True cluster_id = out.split("submitted to cluster ")[-1].split(".",1)[0].strip() else: raise RuntimeError("Couldn't submit job to cluster because:\n----\n{0}\n----".format(out)) return succeeded, cluster_id