예제 #1
0
def get_status(ds):
    status = "unknown"
    try: status = dis_client.query(ds+",this", typ="mcm")["response"]["payload"]["status"]
    except: pass
    sys.stdout.write(".")
    sys.stdout.flush()
    return status
예제 #2
0
def get_xsec_efact(ds):
    xsec = -1
    efact = 1
    try: 
        payload = dis_client.query(ds, typ="mcm")["response"]["payload"]
        xsec = payload["cross_section"]
        efact = payload["filter_efficiency"]
    except: pass
    return xsec, efact
예제 #3
0
def get_status(ds):
    status = "unknown"
    try:
        status = dis_client.query(ds + ",this",
                                  typ="mcm")["response"]["payload"]["status"]
    except:
        pass
    sys.stdout.write(".")
    sys.stdout.flush()
    return status
예제 #4
0
def get_driver(dataset_name):

    # Get the query data
    query = dataset_name + ",this"
    data = dis_client.query(query, typ="driver")

    # Check the query data status
    if data["response"]["status"] != "success":
        print "ERROR - Query failed!"
        print "Check your dataset_name = %s" % (dataset_name)
        sys.exit(255)

    # Get the driver commands
    cmsDriver = data["response"]["payload"]["cmsDriver"]

    return format_driver(cmsDriver)
예제 #5
0
                                  typ="mcm")["response"]["payload"]["status"]
    except:
        pass
    sys.stdout.write(".")
    sys.stdout.flush()
    return status


if __name__ == "__main__":

    campaigns = ["RunIISpring16MiniAODv1", "RunIISpring16MiniAODv2"]
    old_fname = "past_samples.txt"

    all_datasets = []
    for campaign in campaigns:
        output = dis_client.query("/*/*%s*/MINIAODSIM" % campaign)
        all_datasets.extend(output["response"]["payload"])

    print "Found %i total datasets on DAS for campaigns: %s" % (
        len(all_datasets), ", ".join(campaigns))

    datasets = []
    cut_strs = ["RelVal", "Radion", "/GG_", "/RS", "X53", "Grav", "Tstar", "Bstar", "LQ", "Wprime", \
                "Zprime", "Bprime", "Tprime", "_FlatPt", "-gun", "DarkMatter", "DM", "ChargedHiggs", \
                "DisplacedSUSY", "GGJets", "GluGlu", "NNPDF", "LFV", "ToGG", "WToTauNu_M-", "WToMuNu_M-", \
                "WToENu_M-", "XXTo4J", "HToZATo", "SMS-T2bH", "VBFHToTauTau", "VBF_HToMuMu", "VBF_HToZZTo4L" \
                "WJetsToQQ", "RAWAODSIM", "RECODEBUG", "BlackHole", "NMSSM", "Qstar", "RPV", "Upsilon"]
    for dataset in all_datasets:
        isBad = False
        for cut_str in cut_strs:
            if cut_str in dataset:
예제 #6
0
def fetch_nocache(q,**kwargs):
    print "making request for {}".format(q)
    return dis.query(q,**kwargs)["response"]["payload"]
예제 #7
0
# open json and make a runlumi object out of it
with open(fname, "r") as fhin:
    js = json.load(fhin)
all_run_lumis = lu.RunLumis(js)

# print for Twiki copy-paste purposes
print "---++++ !Lumis per era using JSON: %s" % fname
print "<br />%EDITTABLE{}%"
print "| *Era* | *First run* | *Last run* | *Int. lumi [/fb]* | "

for dataset_pattern in dataset_patterns:
    runs = set([])
    # for each pd in the pds above, make an inclusive set of runs
    # it could be the case that the first run is not present in a PD, so we are essentially
    # doing an OR of the few PDs above
    for pd in pds:
        dataset = dataset_pattern.format(pd=pd)
        runs.update(set(dis.query(dataset, typ="runs")["response"]["payload"]))

    # the min and max runs we'll consider as the start and end of that era
    first_run = min(runs)
    last_run = max(runs)

    # get the integrated luminosity in the run range
    int_lumi = all_run_lumis.getIntLumi(first_run=first_run, last_run=last_run)
    era = dataset_pattern.split("/")[2]
    int_lumi_fb = int_lumi / 1000.0
    print "| !%s | %i | %i | %.3f |" % (era, first_run, last_run, int_lumi_fb)

    sys.stdout.flush()
예제 #8
0
#!/bin/python

import dis_client
import commands
import json

search_list = "275836:75:151381462,275847:360:479181129,275658:250:491930764,276775:1090:1902546029,276525:2453:4196742942,276501:141:232625640,276501:1277:2150454271,276811:502:912019671,277069:189:237076988,277305:372:562512901,278770:390:752183616,278509:106:62909241,278345:314:469092385,278509:623:966895086,279766:671:1247479543,279115:298:424149717"
list_of_events = search_list.split(",")

dataset_prefixes = ["JetHT"]

for prefix in dataset_prefixes:
    data = dis_client.query(q="/%s/Run2016*-23Sep2016*/MINIAOD" % (prefix))
    list_of_datasets = data['response']['payload']
    for dataset in list_of_datasets:
        print dataset
        for event in list_of_events:
            cms3_data = dis_client.query(q="%s,%s" % (dataset, event),
                                         typ="pick_cms3")
            if not cms3_data['response']['fail_reason'] == '':
                continue
            if not cms3_data['response']['payload']['files']:
                continue
            print dataset
            print "\t%s %s" % (cms3_data['response']['payload']['files'],
                               event.replace(":", ", "))
            if cms3_data['response']['payload']['skim_command']:
                print "\t%s" % (
                    cms3_data['response']['payload']['skim_command'])
                status, output = commands.getstatusoutput(
                    "./%s -o %s" %
예제 #9
0
types = []
for line in old_raw.split("\n"):
    if "---+++" in line:
        typ = line.split("---+++")[1]
        types.append(typ)
    if line.count("|") is not 13 or "Dataset*" in line: continue
    dataset = line.split("|")[1].strip()
    if typ:
        d_samples_to_type[dataset] = typ
        d_dataset_to_twikiline[dataset] = line

old_samples = twiki.get_samples(username=twiki_username, page=old_twiki, get_unmade=False, assigned_to="all")
already_new_samples = twiki.get_samples(username=twiki_username, page=new_twiki, get_unmade=False, assigned_to="all")

old_datasets = [s["dataset"] for s in old_samples if "dataset" in s]
new_datasets = dis_client.query("/*/*%s*/MINIAODSIM" % campaign_string)["response"]["payload"]
already_new_datasets = [s["dataset"] for s in already_new_samples if "dataset" in s]

# print new_datasets
# /TTWJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext2-v1/MINIAODSIM
# print [nd for nd in new_datasets if "ttw" in nd.lower()]

# for each old, find new sample that has same content between first two slashes (the Primary Dataset) and ext number matches
# map old dataset name --> new dataset name
# map new dataset name --> old dataset name
# print [od for od in old_datasets if "QCD_Pt_470to600_TuneCUETP8M1_13TeV_pythia8" in od]
# print [od for od in new_datasets if "QCD_Pt_470to600_TuneCUETP8M1_13TeV_pythia8" in od]

d_old_to_new = {}
for old in old_datasets:
    matches = []
예제 #10
0
        ("/SingleMuon/Run2018D-PromptReco-v2/MINIAOD",
         "data_Run2018D_SingleMuon_PromptReco"),
        ("/EGamma/Run2018D-22Jan2019-v2/MINIAOD",
         "data_Run2018D_EGamma_22Jan2019"),
    ],
}

os.system("mkdir -p configs/{0}".format(TAG))

for year in datasets:
    print year
    for ds, short in datasets[year]:
        if "WH_HtoRhoGammaPhiGamma" in ds:
            loc = "/hadoop/cms/store/group/snt/run2_mc2018_private/WH_HtoRhoGammaPhiGamma_privateMC_102x_MINIAOD_v1"
        else:
            info = dis_client.query(ds, "snt")["response"]["payload"]
            loc = info[0]["location"]
        print loc

        outdir = "/hadoop/cms/store/user/bemarsh/WHphigamma/{0}/{1}/{2}".format(
            TAG, year, short)

        fout = open("configs/{0}/config_{1}_{2}.cmd".format(TAG, year, short),
                    'w')
        fout.write("""
universe=vanilla
when_to_transfer_output = ON_EXIT
+DESIRED_Sites="T2_US_UCSD"
+remote_DESIRED_Sites="T2_US_UCSD"
+Owner = undefined
log=logs/condor_submit.log
예제 #11
0
#!/bin/env python

import dis_client
import sys
response = dis_client.query(q=sys.argv[1], typ="basic", detail=False)
samples = response["response"]["payload"]
samples_nevents = []
for sample in samples:
    response = dis_client.query(q=sample, typ="basic", detail=False)
    nevent = response["response"]["payload"]["nevents"]
    samples_nevents.append((sample, nevent))
samples_nevents.sort(key=lambda x: x[1], reverse=True)
for sample, nevent in samples_nevents:
    print sample, nevent
예제 #12
0
with open(fname, "r") as fhin:
    js = json.load(fhin)
all_run_lumis = lu.RunLumis(js)

# print for Twiki copy-paste purposes
print "---++++ !Lumis per era using JSON: %s" % fname
print "<br />%EDITTABLE{}%"
print "| *Era* | *First run* | *Last run* | *Int. lumi [/fb]* | "

for dataset_pattern in dataset_patterns:
    runs = set([])
    # for each pd in the pds above, make an inclusive set of runs
    # it could be the case that the first run is not present in a PD, so we are essentially
    # doing an OR of the few PDs above
    for pd in pds:
        dataset = dataset_pattern.format(pd=pd)
        runs.update( set(dis.query(dataset, typ="runs")["response"]["payload"]) )

    # the min and max runs we'll consider as the start and end of that era
    first_run = min(runs)
    last_run = max(runs)

    # get the integrated luminosity in the run range
    int_lumi = all_run_lumis.getIntLumi(first_run=first_run, last_run=last_run)
    era = dataset_pattern.split("/")[2]
    int_lumi_fb = int_lumi / 1000.0
    print "| !%s | %i | %i | %.3f |" % (era, first_run, last_run, int_lumi_fb)

    sys.stdout.flush()

예제 #13
0
    try: status = dis_client.query(ds+",this", typ="mcm")["response"]["payload"]["status"]
    except: pass
    sys.stdout.write(".")
    sys.stdout.flush()
    return status

if __name__ == "__main__":

    # campaigns = ["RunIIFall17MiniAODv2"] #, "RunIISpring16MiniAODv2"]
    campaigns = ["RunIIFall17MiniAODv2","MiniAODv3","RunIIAutumn"] #, "RunIISpring16MiniAODv2"]
    old_fname = "past_samples.txt"

    all_datasets = []
    for campaign in campaigns:
        # output = dis_client.query("/*/*%s*/MINIAODSIM" % campaign)
        output = dis_client.query("/*/*%s*/MINIAODSIM" % campaign)
        all_datasets.extend( output["response"]["payload"] )

    print "Found %i total datasets on DAS for campaigns: %s" % (len(all_datasets), ", ".join(campaigns))


    datasets = []
    for dataset in all_datasets:
        isBad = False
        for cut_str in cut_strs:
            if cut_str in dataset:
                isBad = True
                break

        if isBad: continue
예제 #14
0
if __name__ == "__main__":

    todelete = []
    toadd = []
    badpu = []
    for type in samples_2017:
        for ds in samples_2017[type]:
            vals = samples_2017[type][ds]
            if len(vals) > 4 and vals[4]=="DELETE":
                todelete.append(ds)
            if len(vals) > 4 and vals[4]=="TOADD":
                toadd.append(ds)
                
            if "Run2017" not in ds and (len(vals) <= 4 or vals[4] != "DELETE"):
                out = dis_client.query(ds, "parents")["response"]["payload"]["parents"]
                if len(out) < 1 or "PU2017" not in out[0]:
                    badpu.append(ds)

    print "Datasets to replace:"
    for ds in todelete:
        print ds

    print "\nDatasets to add:"
    for ds in toadd:
        print ds

    print "\nDatasets with old PU and no replacement:"
    for ds in badpu:
        print ds
예제 #15
0
    fs1fb.close()

nsamps = 0
nfiles = 0
nevts = 0

outdir = "{0}/file_lists".format(tag)
os.system("mkdir -p "+outdir)

for type in samples:
    if type not in do:
        continue
    for ds in samples[type]:
        if "RebalanceAndSmear" in tag and type=="data" and "JetHT" not in ds:
            continue
        info = dis_client.query(ds, "snt")["response"]["payload"]
        if len(info)==0:
            if type=="signal":
                if "2016" in tag:
                    sigdir = sigdir2016
                if "2017" in tag:
                    sigdir = sigdir2017
                sigdir += ds.strip("/").replace("/","_")+"_CMS4_V10-02-05"
                info = [{ "location" : sigdir,
                          "cms3tag"  : "CMS4_V10-02-05",
                          "xsec" : 1.0,
                          "kfactor" : 1.0,
                          "filter_eff" : 1.0,
                          "nevents_out" : 1,
                          }]
            else:
예제 #16
0
            if line.find("Add") != -1:
                fullfilename = line.split()[2]
                ifile = os.path.basename(fullfilename).split("_")[-1].split(
                    ".root")[0]
                nevents_to_file_dict[str(data["ijob_to_nevents"]["{}".format(
                    ifile)][0])] = fullfilename
                file_to_nevents_dict[fullfilename] = str(
                    data["ijob_to_nevents"]["{}".format(ifile)][0])

        # Get dis_client query pattern
        dis_query_dataset_pattern = "/" + os.path.basename(
            dirpath.rsplit("MINIAODSIM")[0] + "MINIAODSIM*")
        dis_query_dataset_pattern = dis_query_dataset_pattern.replace("-", "_")

        # Use DIS to parse hadoop path from MINIAOD sample name
        result = dis_client.query(q=dis_query_dataset_pattern, typ="snt")
        status = result["response"]["status"]
        if status == "success":
            payloads = result["response"]["payload"]
            for payload in payloads:
                if payload["cms3tag"].find("CMS3") != -1: continue
                loc = payload["location"]
                nevents_out = payload["nevents_out"]
                dataset_name = payload["dataset_name"]
                if dataset_name not in skipped_fractions:
                    skipped_fractions[dataset_name] = 0
                #print nevents_out
                #if os.path.normpath(loc) != os.path.normpath(dirpath):
                #    print loc
                #    print dirpath
                #    print "ERROR"
예제 #17
0
파일: scrape.py 프로젝트: aminnj/scouting
if __name__ == "__main__":

    executor = concurrent.futures.ProcessPoolExecutor(12)

    import dis_client as dis

    # files = (
    #         dis.query("/MET/Run2018A-17Sep2018-v1/MINIAOD",typ="files",detail=True)["payload"]
    #         + dis.query("/MET/Run2018B-17Sep2018-v1/MINIAOD",typ="files",detail=True)["payload"]
    #         + dis.query("/MET/Run2018C-17Sep2018-v1/MINIAOD",typ="files",detail=True)["payload"]
    #         + dis.query("/MET/Run2018D-PromptReco-v2/MINIAOD",typ="files",detail=True)["payload"]
    #         )
    # outdir = "outputs_full2018/"

    files = (dis.query(
        "/MET/Run2017B-31Mar2018-v1/MINIAOD", typ="files",
        detail=True)["payload"] + dis.query(
            "/MET/Run2017C-31Mar2018-v1/MINIAOD", typ="files",
            detail=True)["payload"] + dis.query(
                "/MET/Run2017D-31Mar2018-v1/MINIAOD", typ="files",
                detail=True)["payload"] +
             dis.query("/MET/Run2017E-31Mar2018-v1/MINIAOD",
                       typ="files",
                       detail=True)["payload"] +
             dis.query("/MET/Run2017F-31Mar2018-v1/MINIAOD",
                       typ="files",
                       detail=True)["payload"])
    outdir = "outputs_full2017/"

    fnames = [f["name"] for f in files if f["nevents"] > 0]
    print(len(fnames))
예제 #18
0
#!/bin/python

import dis_client
import commands
import json

search_list = "275836:75:151381462,275847:360:479181129,275658:250:491930764,276775:1090:1902546029,276525:2453:4196742942,276501:141:232625640,276501:1277:2150454271,276811:502:912019671,277069:189:237076988,277305:372:562512901,278770:390:752183616,278509:106:62909241,278345:314:469092385,278509:623:966895086,279766:671:1247479543,279115:298:424149717"
list_of_events = search_list.split(",")

dataset_prefixes = ["JetHT"]

for prefix in dataset_prefixes:
    data = dis_client.query(q="/%s/Run2016*-23Sep2016*/MINIAOD" % (prefix))
    list_of_datasets = data['response']['payload']
    for dataset in list_of_datasets:
        print dataset
        for event in list_of_events:
            cms3_data = dis_client.query(q="%s,%s" % (dataset, event), typ="pick_cms3")
            if not cms3_data['response']['fail_reason'] == '':
                continue
            if not cms3_data['response']['payload']['files']:
                continue
            print dataset
            print "\t%s %s" % (cms3_data['response']['payload']['files'], event.replace(":", ", "))
            if cms3_data['response']['payload']['skim_command']:
                print "\t%s" % (cms3_data['response']['payload']['skim_command'])
                status,output=commands.getstatusoutput("./%s -o %s" % (cms3_data['response']['payload']['skim_command'], cms3_data['response']['payload']['files'][0].split("/")[-1]))