def get_status(ds): status = "unknown" try: status = dis_client.query(ds+",this", typ="mcm")["response"]["payload"]["status"] except: pass sys.stdout.write(".") sys.stdout.flush() return status
def get_xsec_efact(ds): xsec = -1 efact = 1 try: payload = dis_client.query(ds, typ="mcm")["response"]["payload"] xsec = payload["cross_section"] efact = payload["filter_efficiency"] except: pass return xsec, efact
def get_status(ds): status = "unknown" try: status = dis_client.query(ds + ",this", typ="mcm")["response"]["payload"]["status"] except: pass sys.stdout.write(".") sys.stdout.flush() return status
def get_driver(dataset_name): # Get the query data query = dataset_name + ",this" data = dis_client.query(query, typ="driver") # Check the query data status if data["response"]["status"] != "success": print "ERROR - Query failed!" print "Check your dataset_name = %s" % (dataset_name) sys.exit(255) # Get the driver commands cmsDriver = data["response"]["payload"]["cmsDriver"] return format_driver(cmsDriver)
typ="mcm")["response"]["payload"]["status"] except: pass sys.stdout.write(".") sys.stdout.flush() return status if __name__ == "__main__": campaigns = ["RunIISpring16MiniAODv1", "RunIISpring16MiniAODv2"] old_fname = "past_samples.txt" all_datasets = [] for campaign in campaigns: output = dis_client.query("/*/*%s*/MINIAODSIM" % campaign) all_datasets.extend(output["response"]["payload"]) print "Found %i total datasets on DAS for campaigns: %s" % ( len(all_datasets), ", ".join(campaigns)) datasets = [] cut_strs = ["RelVal", "Radion", "/GG_", "/RS", "X53", "Grav", "Tstar", "Bstar", "LQ", "Wprime", \ "Zprime", "Bprime", "Tprime", "_FlatPt", "-gun", "DarkMatter", "DM", "ChargedHiggs", \ "DisplacedSUSY", "GGJets", "GluGlu", "NNPDF", "LFV", "ToGG", "WToTauNu_M-", "WToMuNu_M-", \ "WToENu_M-", "XXTo4J", "HToZATo", "SMS-T2bH", "VBFHToTauTau", "VBF_HToMuMu", "VBF_HToZZTo4L" \ "WJetsToQQ", "RAWAODSIM", "RECODEBUG", "BlackHole", "NMSSM", "Qstar", "RPV", "Upsilon"] for dataset in all_datasets: isBad = False for cut_str in cut_strs: if cut_str in dataset:
def fetch_nocache(q,**kwargs): print "making request for {}".format(q) return dis.query(q,**kwargs)["response"]["payload"]
# open json and make a runlumi object out of it with open(fname, "r") as fhin: js = json.load(fhin) all_run_lumis = lu.RunLumis(js) # print for Twiki copy-paste purposes print "---++++ !Lumis per era using JSON: %s" % fname print "<br />%EDITTABLE{}%" print "| *Era* | *First run* | *Last run* | *Int. lumi [/fb]* | " for dataset_pattern in dataset_patterns: runs = set([]) # for each pd in the pds above, make an inclusive set of runs # it could be the case that the first run is not present in a PD, so we are essentially # doing an OR of the few PDs above for pd in pds: dataset = dataset_pattern.format(pd=pd) runs.update(set(dis.query(dataset, typ="runs")["response"]["payload"])) # the min and max runs we'll consider as the start and end of that era first_run = min(runs) last_run = max(runs) # get the integrated luminosity in the run range int_lumi = all_run_lumis.getIntLumi(first_run=first_run, last_run=last_run) era = dataset_pattern.split("/")[2] int_lumi_fb = int_lumi / 1000.0 print "| !%s | %i | %i | %.3f |" % (era, first_run, last_run, int_lumi_fb) sys.stdout.flush()
#!/bin/python import dis_client import commands import json search_list = "275836:75:151381462,275847:360:479181129,275658:250:491930764,276775:1090:1902546029,276525:2453:4196742942,276501:141:232625640,276501:1277:2150454271,276811:502:912019671,277069:189:237076988,277305:372:562512901,278770:390:752183616,278509:106:62909241,278345:314:469092385,278509:623:966895086,279766:671:1247479543,279115:298:424149717" list_of_events = search_list.split(",") dataset_prefixes = ["JetHT"] for prefix in dataset_prefixes: data = dis_client.query(q="/%s/Run2016*-23Sep2016*/MINIAOD" % (prefix)) list_of_datasets = data['response']['payload'] for dataset in list_of_datasets: print dataset for event in list_of_events: cms3_data = dis_client.query(q="%s,%s" % (dataset, event), typ="pick_cms3") if not cms3_data['response']['fail_reason'] == '': continue if not cms3_data['response']['payload']['files']: continue print dataset print "\t%s %s" % (cms3_data['response']['payload']['files'], event.replace(":", ", ")) if cms3_data['response']['payload']['skim_command']: print "\t%s" % ( cms3_data['response']['payload']['skim_command']) status, output = commands.getstatusoutput( "./%s -o %s" %
types = [] for line in old_raw.split("\n"): if "---+++" in line: typ = line.split("---+++")[1] types.append(typ) if line.count("|") is not 13 or "Dataset*" in line: continue dataset = line.split("|")[1].strip() if typ: d_samples_to_type[dataset] = typ d_dataset_to_twikiline[dataset] = line old_samples = twiki.get_samples(username=twiki_username, page=old_twiki, get_unmade=False, assigned_to="all") already_new_samples = twiki.get_samples(username=twiki_username, page=new_twiki, get_unmade=False, assigned_to="all") old_datasets = [s["dataset"] for s in old_samples if "dataset" in s] new_datasets = dis_client.query("/*/*%s*/MINIAODSIM" % campaign_string)["response"]["payload"] already_new_datasets = [s["dataset"] for s in already_new_samples if "dataset" in s] # print new_datasets # /TTWJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext2-v1/MINIAODSIM # print [nd for nd in new_datasets if "ttw" in nd.lower()] # for each old, find new sample that has same content between first two slashes (the Primary Dataset) and ext number matches # map old dataset name --> new dataset name # map new dataset name --> old dataset name # print [od for od in old_datasets if "QCD_Pt_470to600_TuneCUETP8M1_13TeV_pythia8" in od] # print [od for od in new_datasets if "QCD_Pt_470to600_TuneCUETP8M1_13TeV_pythia8" in od] d_old_to_new = {} for old in old_datasets: matches = []
("/SingleMuon/Run2018D-PromptReco-v2/MINIAOD", "data_Run2018D_SingleMuon_PromptReco"), ("/EGamma/Run2018D-22Jan2019-v2/MINIAOD", "data_Run2018D_EGamma_22Jan2019"), ], } os.system("mkdir -p configs/{0}".format(TAG)) for year in datasets: print year for ds, short in datasets[year]: if "WH_HtoRhoGammaPhiGamma" in ds: loc = "/hadoop/cms/store/group/snt/run2_mc2018_private/WH_HtoRhoGammaPhiGamma_privateMC_102x_MINIAOD_v1" else: info = dis_client.query(ds, "snt")["response"]["payload"] loc = info[0]["location"] print loc outdir = "/hadoop/cms/store/user/bemarsh/WHphigamma/{0}/{1}/{2}".format( TAG, year, short) fout = open("configs/{0}/config_{1}_{2}.cmd".format(TAG, year, short), 'w') fout.write(""" universe=vanilla when_to_transfer_output = ON_EXIT +DESIRED_Sites="T2_US_UCSD" +remote_DESIRED_Sites="T2_US_UCSD" +Owner = undefined log=logs/condor_submit.log
#!/bin/env python import dis_client import sys response = dis_client.query(q=sys.argv[1], typ="basic", detail=False) samples = response["response"]["payload"] samples_nevents = [] for sample in samples: response = dis_client.query(q=sample, typ="basic", detail=False) nevent = response["response"]["payload"]["nevents"] samples_nevents.append((sample, nevent)) samples_nevents.sort(key=lambda x: x[1], reverse=True) for sample, nevent in samples_nevents: print sample, nevent
with open(fname, "r") as fhin: js = json.load(fhin) all_run_lumis = lu.RunLumis(js) # print for Twiki copy-paste purposes print "---++++ !Lumis per era using JSON: %s" % fname print "<br />%EDITTABLE{}%" print "| *Era* | *First run* | *Last run* | *Int. lumi [/fb]* | " for dataset_pattern in dataset_patterns: runs = set([]) # for each pd in the pds above, make an inclusive set of runs # it could be the case that the first run is not present in a PD, so we are essentially # doing an OR of the few PDs above for pd in pds: dataset = dataset_pattern.format(pd=pd) runs.update( set(dis.query(dataset, typ="runs")["response"]["payload"]) ) # the min and max runs we'll consider as the start and end of that era first_run = min(runs) last_run = max(runs) # get the integrated luminosity in the run range int_lumi = all_run_lumis.getIntLumi(first_run=first_run, last_run=last_run) era = dataset_pattern.split("/")[2] int_lumi_fb = int_lumi / 1000.0 print "| !%s | %i | %i | %.3f |" % (era, first_run, last_run, int_lumi_fb) sys.stdout.flush()
try: status = dis_client.query(ds+",this", typ="mcm")["response"]["payload"]["status"] except: pass sys.stdout.write(".") sys.stdout.flush() return status if __name__ == "__main__": # campaigns = ["RunIIFall17MiniAODv2"] #, "RunIISpring16MiniAODv2"] campaigns = ["RunIIFall17MiniAODv2","MiniAODv3","RunIIAutumn"] #, "RunIISpring16MiniAODv2"] old_fname = "past_samples.txt" all_datasets = [] for campaign in campaigns: # output = dis_client.query("/*/*%s*/MINIAODSIM" % campaign) output = dis_client.query("/*/*%s*/MINIAODSIM" % campaign) all_datasets.extend( output["response"]["payload"] ) print "Found %i total datasets on DAS for campaigns: %s" % (len(all_datasets), ", ".join(campaigns)) datasets = [] for dataset in all_datasets: isBad = False for cut_str in cut_strs: if cut_str in dataset: isBad = True break if isBad: continue
if __name__ == "__main__": todelete = [] toadd = [] badpu = [] for type in samples_2017: for ds in samples_2017[type]: vals = samples_2017[type][ds] if len(vals) > 4 and vals[4]=="DELETE": todelete.append(ds) if len(vals) > 4 and vals[4]=="TOADD": toadd.append(ds) if "Run2017" not in ds and (len(vals) <= 4 or vals[4] != "DELETE"): out = dis_client.query(ds, "parents")["response"]["payload"]["parents"] if len(out) < 1 or "PU2017" not in out[0]: badpu.append(ds) print "Datasets to replace:" for ds in todelete: print ds print "\nDatasets to add:" for ds in toadd: print ds print "\nDatasets with old PU and no replacement:" for ds in badpu: print ds
fs1fb.close() nsamps = 0 nfiles = 0 nevts = 0 outdir = "{0}/file_lists".format(tag) os.system("mkdir -p "+outdir) for type in samples: if type not in do: continue for ds in samples[type]: if "RebalanceAndSmear" in tag and type=="data" and "JetHT" not in ds: continue info = dis_client.query(ds, "snt")["response"]["payload"] if len(info)==0: if type=="signal": if "2016" in tag: sigdir = sigdir2016 if "2017" in tag: sigdir = sigdir2017 sigdir += ds.strip("/").replace("/","_")+"_CMS4_V10-02-05" info = [{ "location" : sigdir, "cms3tag" : "CMS4_V10-02-05", "xsec" : 1.0, "kfactor" : 1.0, "filter_eff" : 1.0, "nevents_out" : 1, }] else:
if line.find("Add") != -1: fullfilename = line.split()[2] ifile = os.path.basename(fullfilename).split("_")[-1].split( ".root")[0] nevents_to_file_dict[str(data["ijob_to_nevents"]["{}".format( ifile)][0])] = fullfilename file_to_nevents_dict[fullfilename] = str( data["ijob_to_nevents"]["{}".format(ifile)][0]) # Get dis_client query pattern dis_query_dataset_pattern = "/" + os.path.basename( dirpath.rsplit("MINIAODSIM")[0] + "MINIAODSIM*") dis_query_dataset_pattern = dis_query_dataset_pattern.replace("-", "_") # Use DIS to parse hadoop path from MINIAOD sample name result = dis_client.query(q=dis_query_dataset_pattern, typ="snt") status = result["response"]["status"] if status == "success": payloads = result["response"]["payload"] for payload in payloads: if payload["cms3tag"].find("CMS3") != -1: continue loc = payload["location"] nevents_out = payload["nevents_out"] dataset_name = payload["dataset_name"] if dataset_name not in skipped_fractions: skipped_fractions[dataset_name] = 0 #print nevents_out #if os.path.normpath(loc) != os.path.normpath(dirpath): # print loc # print dirpath # print "ERROR"
if __name__ == "__main__": executor = concurrent.futures.ProcessPoolExecutor(12) import dis_client as dis # files = ( # dis.query("/MET/Run2018A-17Sep2018-v1/MINIAOD",typ="files",detail=True)["payload"] # + dis.query("/MET/Run2018B-17Sep2018-v1/MINIAOD",typ="files",detail=True)["payload"] # + dis.query("/MET/Run2018C-17Sep2018-v1/MINIAOD",typ="files",detail=True)["payload"] # + dis.query("/MET/Run2018D-PromptReco-v2/MINIAOD",typ="files",detail=True)["payload"] # ) # outdir = "outputs_full2018/" files = (dis.query( "/MET/Run2017B-31Mar2018-v1/MINIAOD", typ="files", detail=True)["payload"] + dis.query( "/MET/Run2017C-31Mar2018-v1/MINIAOD", typ="files", detail=True)["payload"] + dis.query( "/MET/Run2017D-31Mar2018-v1/MINIAOD", typ="files", detail=True)["payload"] + dis.query("/MET/Run2017E-31Mar2018-v1/MINIAOD", typ="files", detail=True)["payload"] + dis.query("/MET/Run2017F-31Mar2018-v1/MINIAOD", typ="files", detail=True)["payload"]) outdir = "outputs_full2017/" fnames = [f["name"] for f in files if f["nevents"] > 0] print(len(fnames))
#!/bin/python import dis_client import commands import json search_list = "275836:75:151381462,275847:360:479181129,275658:250:491930764,276775:1090:1902546029,276525:2453:4196742942,276501:141:232625640,276501:1277:2150454271,276811:502:912019671,277069:189:237076988,277305:372:562512901,278770:390:752183616,278509:106:62909241,278345:314:469092385,278509:623:966895086,279766:671:1247479543,279115:298:424149717" list_of_events = search_list.split(",") dataset_prefixes = ["JetHT"] for prefix in dataset_prefixes: data = dis_client.query(q="/%s/Run2016*-23Sep2016*/MINIAOD" % (prefix)) list_of_datasets = data['response']['payload'] for dataset in list_of_datasets: print dataset for event in list_of_events: cms3_data = dis_client.query(q="%s,%s" % (dataset, event), typ="pick_cms3") if not cms3_data['response']['fail_reason'] == '': continue if not cms3_data['response']['payload']['files']: continue print dataset print "\t%s %s" % (cms3_data['response']['payload']['files'], event.replace(":", ", ")) if cms3_data['response']['payload']['skim_command']: print "\t%s" % (cms3_data['response']['payload']['skim_command']) status,output=commands.getstatusoutput("./%s -o %s" % (cms3_data['response']['payload']['skim_command'], cms3_data['response']['payload']['files'][0].split("/")[-1]))