def get_tasks(): pds = [ "MuonEG","SingleElectron","MET","SinglePhoton","SingleMuon","DoubleMuon","JetHT","DoubleEG","HTMHT" ] out = dis.query("/*/Run2016*-17Jul2018*/MINIAOD") all_names = out["response"]["payload"] all_names = sorted([ds for ds in all_names if any("/{}/".format(pd) in ds for pd in pds)]) out = dis.query("/*/Run2016*-17Jul2018*/MINIAOD,cms3tag=*17 | grep dataset_name",typ="snt") already_done = out["response"]["payload"] already_done = sorted([ds for ds in already_done if any("/{}/".format(pd) in ds for pd in pds)]) dataset_names = list(set(all_names)-set(already_done)) # dataset_names = [ds for ds in dataset_names if ds not in ["/MET/Run2016H-17Jul2018-v1/MINIAOD"]] # dataset_names = [] # dataset_names += ["/MET/Run2016B-17Jul2018_ver1-v1/MINIAOD"] # dataset_names += ["/SingleMuon/Run2016B-17Jul2018_ver2-v1/MINIAOD"] # deleted a corruption and want to make sure this gets redone # dataset_names += ["/SinglePhoton/Run2016E-17Jul2018-v1/MINIAOD"] # deleted a corruption and want to make sure this gets redone # dataset_names = [] # dataset_names += ["/JetHT/Run2016B-17Jul2018_ver2-v2/MINIAOD"] # deleted a corruption and want to make sure this gets redone tasks = [] for dsname in dataset_names: cmsswver = "CMSSW_9_4_9" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V09-04-17_949.tar.gz" pset = "psets/pset_data2016_94x_v2.py" scramarch = "slc6_amd64_gcc630" task = CMSSWTask( sample = DBSSample(dataset=dsname), open_dataset = False, events_per_output = 400e3, output_name = "merged_ntuple.root", tag = "CMS4_V09-04-17", pset = pset, pset_args = "data=True prompt=False name=DQM", scram_arch = scramarch, cmssw_version = cmsswver, condor_submit_params = {"use_xrootd":True}, tarfile = tarfile, is_data = True, publish_to_dis = True, snt_dir = True, special_dir = "run2_data2016_94x/", ) tasks.append(task) return tasks
def load_from_dis(self): (status, val) = self.check_params_for_dis_query() if not status: self.logger.error( "[Dataset] Failed to load info for dataset %s from DIS because parameter %s is missing." % (self.info["dataset"], val)) return False query_str = "status=%s, dataset_name=%s, sample_type=%s" % ( Constants.VALID_STR, self.info["dataset"], self.info["type"]) if self.info["type"] != "CMS3": query_str += ", analysis=%s" % (self.info["analysis"]) if self.info["tag"]: query_str += ", cms3tag=%s" % (self.info["tag"]) response = {} try: response = dis.query(query_str, typ='snt', detail=True) response = response["payload"] if len(response) == 0: self.logger.error( " Query found no matching samples for: status = %s, dataset = %s, type = %s analysis = %s" % (self.info["status"], self.info["dataset"], self.info["type"], self.info["analysis"])) return False if len(response) > 1: # response = self.sort_query_by_key(response,"timestamp") response = self.sort_query_by_key(response, "cms3tag") if hasattr(self, "exclude_tag_pattern") and self.exclude_tag_pattern: new_response = [] for samp in response: tag = samp.get("tag", samp.get("cms3tag", "")) if fnmatch.fnmatch(tag, self.exclude_tag_pattern): continue new_response.append(samp) response = new_response self.info["gtag"] = response[0]["gtag"] self.info["kfact"] = response[0]["kfactor"] self.info["xsec"] = response[0]["xsec"] self.info["filtname"] = response[0].get("filter_name", "NoFilter") self.info["efact"] = response[0]["filter_eff"] self.info["analysis"] = response[0].get("analysis", "") self.info["tag"] = response[0].get("tag", response[0].get("cms3tag")) self.info["version"] = response[0].get("version", "v1.0") self.info["nevts_in"] = response[0]["nevents_in"] self.info["nevts"] = response[0]["nevents_out"] self.info["location"] = response[0]["location"] self.info["status"] = response[0].get("status", Constants.VALID_STR) self.info["twiki"] = response[0].get("twiki_name", "") self.info["files"] = response[0].get("files", []) self.info["comments"] = response[0].get("comments", "") return True except: return False
def do_update_dis(self): if hasattr(self, "read_only") and self.read_only: self.logger.debug( "Not updating DIS since this sample has read_only=True") return False self.logger.debug("Updating DIS") query_str = "dataset_name={},sample_type={},cms3tag={},gtag={},location={},nevents_in={},nevents_out={},xsec={},kfactor={},filter_eff={},timestamp={}".format( self.info["dataset"], self.info["tier"], self.info["tag"], self.info["gtag"], self.info["location"], self.info["nevents_in"], self.info["nevents"], self.info["xsec"], self.info["kfact"], self.info["efact"], int(time.time())) response = {} try: succeeded = False response = dis.query(query_str, typ='update_snt') response = response["payload"] if "updated" in response and str( response["updated"]).lower() == "true": succeeded = True self.logger.debug("Updated DIS") except: pass if not succeeded: self.logger.debug( "WARNING: failed to update sample using DIS with query_str: {}" .format(query_str)) self.logger.debug("WARNING: got response: {}".format(response)) return succeeded
def get_file_replicas(dsname): rawresponse = dis.query(dsname, typ="sites", detail=True) info = rawresponse["payload"]["block"] file_replicas = {} for block in info: for fd in block["file"]: filesizeGB = round(fd["bytes"] / (1.0e6), 2) fname = fd["name"] nodes = [] for node in fd["replica"]: name = str(node["node"]) if node.get("se", None) and "TAPE" in node["se"]: continue # no tape if "_US_" not in name: continue # only US if "FNAL" in name: # can't run directly at fnal, but purdue is basically next to fnal name = "T2_US_Purdue" # though if it's already at purdue anyway, no need to duplicate the node name if name in nodes: continue nodes.append(name) file_replicas[fname] = { "name": fname, "nodes": nodes, "filesizeGB": filesizeGB } return file_replicas
def get_file_replicas_uncached(dsname, dasgoclient=False): if os.getenv("USEDASGOCLIENT", False): dasgoclient = True if dasgoclient: url = "https://cmsweb.cern.ch/phedex/datasvc/json/prod/fileReplicas?dataset={}".format( dsname) response = urllib.urlopen(url).read() info = json.loads(response)["phedex"]["block"] else: rawresponse = dis.query(dsname, typ="sites", detail=True) info = rawresponse["payload"]["block"] file_replicas = {} for block in info: for fd in block["file"]: filesizeGB = round(fd["bytes"] / (1.0e6), 2) fname = fd["name"] nodes = [] for node in fd["replica"]: name = str(node["node"]) if node.get("se", None) and "TAPE" in node["se"]: continue # no tape if "_US_" not in name: continue # only US if "FNAL" in name: # can't run directly at fnal, but purdue is basically next to fnal name = "T2_US_Purdue" # though if it's already at purdue anyway, no need to duplicate the node name if name in nodes: continue nodes.append(name) file_replicas[fname] = { "name": fname, "nodes": nodes, "filesizeGB": filesizeGB } return file_replicas
def do_dis_query(self, ds, typ="files"): self.logger.debug("Doing DIS query of type {0} for {1}".format( typ, ds)) rawresponse = dis.query(ds, typ=typ, detail=True) response = rawresponse["payload"] if not len(response): self.logger.error("Query failed with response:" + str(rawresponse)) return response
def load_from_dis(self): (status, val) = self.check_params_for_dis_query() if not status: self.logger.error( "[Dataset] Failed to load info for dataset %s from DIS because parameter %s is missing." % (self.info["dataset"], val)) return False query_str = "status=%s, dataset_name=%s, sample_type=%s" % ( Constants.VALID_STR, self.info["dataset"], self.info["type"]) if self.info["type"] != "CMS3": query_str += ", analysis=%s" % (self.info["analysis"]) response = {} try: dis_status = False response = dis.query(query_str, typ='snt') response = response["response"]["payload"] if len(response) == 0: self.logger.error( " Query found no matching samples for: status = %s, dataset = %s, type = %s analysis = %s" % (self.info["status"], self.info["dataset"], self.info["type"], self.info["analysis"])) return False if len(response) > 1: response = self.sort_query_by_timestamp(response) self.info["gtag"] = response[0]["gtag"] self.info["kfact"] = response[0]["kfactor"] self.info["xsec"] = response[0]["xsec"] self.info["filtname"] = response[0]["filter_name"] self.info["efilt"] = response[0]["filter_eff"] self.info["analysis"] = response[0]["analysis"] self.info["tag"] = response[0].get("tag", response[0].get("cms3tag")) self.info["version"] = response[0].get("version", "v1.0") self.info["nevts_in"] = response[0]["nevents_in"] self.info["nevts"] = response[0]["nevents_out"] self.info["location"] = response[0]["location"] self.info["creator"] = response[0]["assigned_to"] self.info["status"] = response[0].get("status", Constants.VALID_STR) self.info["twiki"] = response[0]["twiki_name"] self.info["siblings"] = response[0].get("siblings", []) self.info["files"] = response[0].get("files", []) self.info["comments"] = response[0]["comments"] return True except: return False
def get_tasks(): pds = [ "SingleElectron", "MET", "SinglePhoton", "SingleMuon", "DoubleMuon", "JetHT", "DoubleEG", "HTMHT" ] out = dis.query("/*/Run2016*-17Jul2018*/MINIAOD") dataset_names = out["response"]["payload"] dataset_names = sorted([ ds for ds in dataset_names if any("/{}/".format(pd) in ds for pd in pds) ]) tasks = [] for dsname in dataset_names: cmsswver = "CMSSW_9_4_9" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V09-04-17_949.tar.gz" pset = "psets_cms4/main_pset_V09-04-17.py" scramarch = "slc6_amd64_gcc630" task = CMSSWTask( sample=DBSSample(dataset=dsname), open_dataset=False, events_per_output=400e3, output_name="merged_ntuple.root", tag="CMS4_V09-04-17", pset=pset, pset_args="data=True prompt=False name=DQM", scram_arch=scramarch, cmssw_version=cmsswver, condor_submit_params={"use_xrootd": True}, tarfile=tarfile, is_data=True, publish_to_dis=True, snt_dir=True, special_dir="run2_data2016_94x/", ) tasks.append(task) return tasks
def get_tasks(): pds = ["MuonEG", "DoubleMuon", "EGamma", "JetHT", "MET", "SingleMuon"] # pds = ["MuonEG","DoubleMuon","EGamma"] #, "JetHT", "MET", "SingleMuon"] # pds = ["EGamma"] proc_vers = [ ("Run2018A", "v1"), ("Run2018A", "v2"), ("Run2018A", "v3"), ("Run2018B", "v1"), ("Run2018B", "v2"), ("Run2018C", "v1"), ("Run2018C", "v2"), ("Run2018C", "v3"), # ("Run2018C","v1"), # ("Run2018D","v1"), # very short, not in golden json, most PDs are missing on DAS # ("Run2018D","v2"), ] dataset_names = [ "/{0}/{1}-17Sep2018-{2}/MINIAOD".format(x[0], x[1][0], x[1][1]) for x in itertools.product(pds, proc_vers) ] # skip datasets that aren't on das # out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD") # NOTE Screw it, just get all the datasets and pretend they are open. Comp/ops people allow production->valid flag if # the dataset is 99% complete to not "block their distribution" even though it's stupid. # See https://hypernews.cern.ch/HyperNews/CMS/get/physics-validation/3267/1/1/1/1/2/2.html out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD,all") dis_names = out["response"]["payload"] dis_names = [ ds for ds in dis_names if any("/{}/".format(pd) in ds for pd in pds) ] dataset_names = list(set(dataset_names) & set(dis_names)) # print dataset_names # blah tasks = [] for dsname in dataset_names: scram_arch = "slc6_amd64_gcc700" cmsswver = "CMSSW_10_2_4_patch1" pset = "psets/pset_data2018_102x.py" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10-02-01_1024p1.tar.xz" task = CMSSWTask( sample=DBSSample(dataset=dsname), open_dataset=True, # flush = True, # flush = ((i+1)%48==0), # flush = ((i)%48==0), events_per_output=300e3, output_name="merged_ntuple.root", tag="CMS4_V10-02-01", scram_arch=scram_arch, pset=pset, pset_args= "data=True prompt=True", # NOTE, this isn't actually prompt. but just read the NtupleMaker 10x branch readme, ok? cmssw_version=cmsswver, # condor_submit_params = {"use_xrootd":True}, tarfile=tarfile, is_data=True, publish_to_dis=True, snt_dir=True, special_dir="run2_data2018/", ) tasks.append(task) return tasks
def get_tasks(): pds = ["MuonEG", "DoubleMuon", "EGamma", "JetHT", "MET", "SingleMuon"] proc_vers = [ ("Run2018A", "v1"), ("Run2018A", "v2"), ("Run2018A", "v3"), ("Run2018B", "v1"), ("Run2018B", "v2"), ("Run2018C", "v1"), ("Run2018C", "v2"), ("Run2018C", "v3"), ("Run2018D", "v1"), # very short, not in golden json, most PDs are missing on DAS ("Run2018D", "v2"), ] dataset_names = [ "/{0}/{1}-PromptReco-{2}/MINIAOD".format(x[0], x[1][0], x[1][1]) for x in itertools.product(pds, proc_vers) ] # skip datasets that aren't on das out = dis.query("/*/Run2018*-PromptReco*/MINIAOD") dis_names = out["response"]["payload"] dis_names = [ ds for ds in dis_names if any("/{}/".format(pd) in ds for pd in pds) ] dataset_names = list(set(dataset_names) & set(dis_names)) tasks = [] for dsname in dataset_names: cmsswver, tarfile = None, None scram_arch = "slc6_amd64_gcc630" pset = "psets/pset_prompt10x_data_1.py" if "Run2018A-PromptReco-v1" in dsname: cmsswver = "CMSSW_10_1_2_patch2" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1012p2.tar.gz" elif "Run2018A-PromptReco-v2" in dsname: cmsswver = "CMSSW_10_1_5" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1015.tar.gz" elif "Run2018A-PromptReco-v3" in dsname: cmsswver = "CMSSW_10_1_5" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1015.tar.gz" elif "Run2018B-PromptReco-v1" in dsname: cmsswver = "CMSSW_10_1_5" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1015.tar.gz" elif "Run2018B-PromptReco-v2" in dsname: cmsswver = "CMSSW_10_1_7" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1017.tar.gz" elif ("Run2018C-PromptReco-" in dsname) or ("Run2018D-PromptReco-" in dsname): cmsswver = "CMSSW_10_2_1" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10_01_00_1021_nodeepak8.tar.gz" scram_arch = "slc6_amd64_gcc700" pset = "psets/pset_prompt10x_data_2.py" task = CMSSWTask( sample=DBSSample(dataset=dsname), open_dataset=False, # flush = ((i+1)%48==0), # flush = ((i)%48==0), events_per_output=350e3, output_name="merged_ntuple.root", recopy_inputs=False, tag="CMS4_V10-01-00", scram_arch=scram_arch, # global_tag = "", # if global tag blank, one from DBS is used pset=pset, pset_args="data=True prompt=True", cmssw_version=cmsswver, # condor_submit_params = {"use_xrootd":True}, tarfile=tarfile, is_data=True, publish_to_dis=True, snt_dir=True, special_dir="run2_data2018_prompt/", ) tasks.append(task) return tasks
"/WWW_4F_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM", "/WWZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM", "/WW_TuneCUETP8M1_13TeV-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v2/MINIAODSIM", "/WZTo3LNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM", "/WZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM", "/WZ_TuneCUETP8M1_13TeV-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v2/MINIAODSIM", "/WpWpJJ_EWK-QCD_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM", "/WpWpJJ_EWK_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM", "/WpWpJJ_QCD_TuneCUETP8M1_13TeV-madgraph-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM", "/ZGTo2LG_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v1/MINIAODSIM", "/ZZTo4L_13TeV_powheg_pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM", "/ZZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM", "/tZq_ll_4f_13TeV-amcatnlo-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3_ext1-v1/MINIAODSIM", "/ttHToNonbb_M125_TuneCUETP8M2_ttHtranche3_13TeV-powheg-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v2/MINIAODSIM", "/ttZJets_13TeV_madgraphMLM-pythia8/RunIISummer16MiniAODv3-PUMoriond17_94X_mcRun2_asymptotic_v3-v1/MINIAODSIM", ] # Now query SNT samples with the same PD, get the latest, then fetch the xsecs to print out # dsname|xsec|kfactor|efactor # which is the format we use in all the metis submission scripts import scripts.dis_client as dis for ds in dsnames: q = "/{}/*80X*/MINIAODSIM".format(ds.split("/")[1]) ret = sorted(dis.query(q,typ="snt")["response"]["payload"],key=lambda x:-x.get("timestamp",-1)) if not ret: print "ERROR with {}".format(ds) continue ret = ret[0] print "{}|{}|{}|{}".format(ds, ret["xsec"], ret["kfactor"], ret["filter_eff"])
def get_tasks(): pds = ["MuonEG","DoubleMuon","EGamma", "JetHT", "MET", "SingleMuon"] # pds = ["MuonEG","DoubleMuon","EGamma"] #, "JetHT", "MET", "SingleMuon"] # pds = ["EGamma"] proc_vers = [ ("Run2018A","v1"), ("Run2018A","v2"), ("Run2018A","v3"), ("Run2018B","v1"), ("Run2018B","v2"), ("Run2018C","v1"), ("Run2018C","v2"), ("Run2018C","v3"), # ("Run2018C","v1"), # ("Run2018D","v1"), # very short, not in golden json, most PDs are missing on DAS # ("Run2018D","v2"), ] dataset_names = ["/{0}/{1}-17Sep2018-{2}/MINIAOD".format(x[0],x[1][0],x[1][1]) for x in itertools.product(pds,proc_vers)] # skip datasets that aren't on das # out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD") # NOTE Screw it, just get all the datasets and pretend they are open. Comp/ops people allow production->valid flag if # the dataset is 99% complete to not "block their distribution" even though it's stupid. # See https://hypernews.cern.ch/HyperNews/CMS/get/physics-validation/3267/1/1/1/1/2/2.html out = dis.query("/*/Run2018*-17Sep2018*/MINIAOD,all") dis_names = out["response"]["payload"] dis_names = [ds for ds in dis_names if any("/{}/".format(pd) in ds for pd in pds)] dataset_names = list(set(dataset_names) & set(dis_names)) # print dataset_names # blah tasks = [] for dsname in dataset_names: scram_arch = "slc6_amd64_gcc700" cmsswver = "CMSSW_10_2_4_patch1" pset = "psets/pset_data2018_102x.py" tarfile = "/nfs-7/userdata/libCMS3/lib_CMS4_V10-02-01_1024p1.tar.xz" task = CMSSWTask( sample = DBSSample(dataset=dsname), open_dataset = True, # flush = True, # flush = ((i+1)%48==0), # flush = ((i)%48==0), events_per_output = 300e3, output_name = "merged_ntuple.root", tag = "CMS4_V10-02-01", scram_arch = scram_arch, pset = pset, pset_args = "data=True prompt=True", # NOTE, this isn't actually prompt. but just read the NtupleMaker 10x branch readme, ok? cmssw_version = cmsswver, # condor_submit_params = {"use_xrootd":True}, tarfile = tarfile, is_data = True, publish_to_dis = True, snt_dir = True, special_dir = "run2_data2018/", ) tasks.append(task) return tasks
out = crabCommand('checkwrite', site="T2_US_UCSD", **proxy_file_dict) print "Done. Status: %s" % out["status"] print # Take first dataset name in instructions.txt print BLUE, "Taking the first sample in instructions.txt. If it's not a FullSim MC sample, then you're going to have a bad time!", ENDC sample = u.read_samples()[0] dataset_name = sample["dataset"] gtag = sample["gtag"] print " --> %s" % dataset_name print # Find the smallest MINIAOD file filelist = dis.query(dataset_name, detail=True, typ="files") filelist = filelist["response"]["payload"] filelist = sorted(filelist, key=lambda x: x.get("sizeGB", 999.0)) smallest_filename = filelist[0]["name"] print BLUE, "Smallest file", ENDC print " --> %s" % smallest_filename print # Use xrootd to get that file ntuple_name = "input.root" print BLUE, "Using xrootd to download the file", ENDC os.system("xrdcp -f root://xrootd.unl.edu/%s %s" % (smallest_filename, ntuple_name)) if os.path.isfile(ntuple_name): print "Success!" else: print "ERROR: failed to download using xrootd" print
for ds in sorted(dsnew & dsold, key=old.get): oldloc = old[ds] newloc = new[ds] if not os.path.exists(oldloc): continue nmiss = num_missing_files(newloc) if nmiss > 0: print "# Can't delete {} because there's {} missing files still".format(oldloc,nmiss) continue print get_cmd(oldloc) print if __name__ == "__main__": # 2017 MC old = {x["dataset_name"]:x["location"] for x in dis.query("*Fall17MiniAOD*,cms3tag=CMS4_V10-02-04 | grep dataset_name,location",typ="snt")["response"]["payload"]} new = {x["dataset_name"]:x["location"] for x in dis.query("*Fall17MiniAOD*,cms3tag=CMS4_V10-02-05 | grep dataset_name,location",typ="snt")["response"]["payload"]} dsold, dsnew = set(old), set(new) print_cmds(old,new,label="2017 MC") # 2017 Data old = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2017*/MINIAOD,cms3tag=CMS4_V10-02-04 | grep dataset_name,location",typ="snt")["response"]["payload"]} new = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2017*/MINIAOD,cms3tag=CMS4_V10-02-05 | grep dataset_name,location",typ="snt")["response"]["payload"]} dsold, dsnew = set(old), set(new) print_cmds(old,new,label="2017 data") # 2016 Data old = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2016*/MINIAOD,cms3tag=CMS4_V10-02-04 | grep dataset_name,location",typ="snt")["response"]["payload"]} new = {x["dataset_name"]:x["location"] for x in dis.query("/*/*Run2016*/MINIAOD,cms3tag=CMS4_V10-02-05 | grep dataset_name,location",typ="snt")["response"]["payload"]} dsold, dsnew = set(old), set(new) print_cmds(old,new,label="2016 94X data")
# Check write permissions print BLUE, "Checking write permissions to UCSD...", ENDC out = crabCommand('checkwrite', site="T2_US_UCSD", **proxy_file_dict) print "Done. Status: %s" % out["status"] print # Take first dataset name in instructions.txt print BLUE, "Taking the first sample in instructions.txt. If it's not a FullSim MC sample, then you're going to have a bad time!", ENDC sample = u.read_samples()[0] dataset_name = sample["dataset"] gtag = sample["gtag"] print " --> %s" % dataset_name print # Find the smallest MINIAOD file filelist = dis.query(dataset_name, detail=True, typ="files") filelist = filelist["response"]["payload"] filelist = sorted(filelist, key=lambda x: x.get("sizeGB", 999.0)) smallest_filename = filelist[0]["name"] print BLUE, "Smallest file", ENDC print " --> %s" % smallest_filename print # Use xrootd to get that file ntuple_name = "input.root" print BLUE, "Using xrootd to download the file", ENDC os.system("xrdcp -f root://xrootd.unl.edu/%s %s" % (smallest_filename, ntuple_name)) if os.path.isfile(ntuple_name): print "Success!" else: print "ERROR: failed to download using xrootd" print
def do_dis_query(self, typ="files"): ds = self.info["dataset"] self.logger.debug("Doing DIS query of type {0} for {1}".format( typ, ds)) if not ds: self.logger.error("No dataset name declared!") return False response = {} do_test = False if do_test: if typ in ["files"]: response = [{ u'nevents': 95999, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/C243580A-534C-E711-97A2-02163E01A1FE.root', u'sizeGB': 2.1000000000000001 }, { u'nevents': 104460, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/36C47789-564C-E711-B555-02163E019C8A.root', u'sizeGB': 2.3500000000000001 }, { u'nevents': 140691, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/685FF878-554C-E711-8E35-02163E01A4E3.root', u'sizeGB': 3.1200000000000001 }, { u'nevents': 107552, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/C2147D89-5B4C-E711-843F-02163E01415B.root', u'sizeGB': 2.4100000000000001 }, { u'nevents': 119678, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/0E34AC81-5A4C-E711-9961-02163E01A549.root', u'sizeGB': 2.6800000000000002 }, { u'nevents': 182253, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/6ED7F30F-594C-E711-8DD3-02163E01A2A9.root', u'sizeGB': 4.0499999999999998 }, { u'nevents': 120161, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/68E6D0F8-5C4C-E711-A50F-02163E019DD2.root', u'sizeGB': 2.6699999999999999 }, { u'nevents': 75886, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/172/00000/4AAC3A09-634C-E711-B5C4-02163E019CCE.root', u'sizeGB': 1.1899999999999999 }, { u'nevents': 188508, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/106C6FC9-604C-E711-904C-02163E019C2C.root', u'sizeGB': 4.1500000000000004 }, { u'nevents': 174713, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/164397BB-5F4C-E711-869C-02163E01A3B3.root', u'sizeGB': 4.1500000000000004 }, { u'nevents': 91384, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/36AD29F3-6F4C-E711-90D1-02163E01A491.root', u'sizeGB': 2.1400000000000001 }, { u'nevents': 117960, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/174/00000/5A4A5050-804C-E711-BF0C-02163E01A270.root', u'sizeGB': 1.8400000000000001 }, { u'nevents': 123173, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/84BB76F4-654C-E711-86E5-02163E01A676.root', u'sizeGB': 2.8399999999999999 }, { u'nevents': 178903, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/173/00000/987DAF51-634C-E711-A339-02163E01415B.root', u'sizeGB': 4.1600000000000001 }, { u'nevents': 48567, u'name': u'/store/data/Run2017A/MET/MINIAOD/PromptReco-v2/000/296/168/00000/98029456-6D4C-E711-911F-02163E019E8D.root', u'sizeGB': 1.1299999999999999 }] if typ in ["config"]: response = { u'app_name': u'cmsRun', u'output_module_label': u'Merged', u'create_by': u'*****@*****.**', u'pset_hash': u'GIBBERISH', u'creation_date': u'2017-06-08 06:02:28', u'release_version': u'CMSSW_9_2_1', u'global_tag': u'92X_dataRun2_Prompt_v4', u'pset_name': None } else: rawresponse = dis.query(ds, typ=typ, detail=True) response = rawresponse["response"]["payload"] if not len(response): self.logger.error("Query failed with response:" + str(rawresponse["response"])) return response