Beispiel #1
0
def test2():
    dataset_names = [
        # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM",
        "/Dummy_test_StopBabyMaker_v25/CMS4",
    ]

    # Make a base directory
    basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(
        os.getenv("USER"))
    MutableFile(basedir).touch()

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files
    ds = DirectorySample(location=basedir,
                         dataset="/TEST/Examplev1/TEST",
                         globber="*.txt")

    # Make a CondorTask (3 in total, one for each input)
    task = CondorTask(
        sample=ds,
        files_per_output=1,
        tag="v0",
        output_name="ttbar_powheg_pythia8_92X.root",
        executable="condor_executable.sh",
        cmssw_version="CMSSW_9_3_1",
        scram_arch="slc6_amd64_gcc700",
        arguments="testarg1",
        tarfile="input.tar.gz",
        condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"},
        no_load_from_backup=
        True,  # for the purpose of the example, don't use a backup
    )
    # do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(),
                                  dataset="/Blah/blah/BLAH",
                                  globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
Beispiel #2
0
 def test_set_files(self):
     dirsamp = DirectorySample(dataset="/blah/blah/BLAH/",
                               location="/dummy/dir/")
     fnames = [
         "/hadoop/cms/store/user/blah/file_1.root",
         "/hadoop/cms/store/user/blah/file_2.root"
     ]
     dirsamp.set_files(fnames)
     self.assertEqual(
         list(map(lambda x: x.get_name(), dirsamp.get_files())), fnames)
def getDict(sample):
    sample_dict = {}

    #print ("Will get info now.")

    # First, get the name
    name = getName(sample[0])
    print("Started with: %s" % name)

    year, era, isData, isFastSim = getYearFromDAS(sample[0])

    # local/private sample?
    local = (sample[0].count('hadoop') + sample[0].count('home'))
    #print ("Is local?", local)
    #print (sample[0])

    if local:
        sample_dict['path'] = sample[0]
        metis_sample = DirectorySample(dataset=name, location=sample[0])

    else:
        sample_dict['path'] = None
        metis_sample = DBSSample(dataset=sample[0])

    allFiles = [f.name for f in metis_sample.get_files()]

    split_factor = getSplitFactor(metis_sample, target=1e6)
    #
    #print (allFiles)
    sample_dict['files'] = len(allFiles)

    if not isData:
        nEvents, sumw, sumw2 = getSampleNorm(allFiles,
                                             local=local,
                                             redirector=redirector_fnal)
    else:
        nEvents, sumw, sumw2 = metis_sample.get_nevents(), 0, 0

    #print (nEvents, sumw, sumw2)
    sample_dict.update({
        'sumWeight': float(sumw),
        'nEvents': int(nEvents),
        'xsec': float(sample[1]),
        'name': name,
        'split': split_factor
    })

    print("Done with: %s" % name)

    return sample_dict
Beispiel #4
0
def test2():
    dataset_names = [
        # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM",
        "/Dummy_test_StopBabyMaker_v25/CMS4",
    ]

    # Make a base directory
    basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(os.getenv("USER"))
    MutableFile(basedir).touch()

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files
    ds = DirectorySample(location=basedir, dataset="/TEST/Examplev1/TEST", globber="*.txt")

    # Make a CondorTask (3 in total, one for each input)
    task = CondorTask(
            sample = ds,
            files_per_output = 1,
            tag = "v0",
            output_name = "ttbar_powheg_pythia8_92X.root",
            executable = "condor_executable.sh",
            cmssw_version = "CMSSW_9_3_1",
            scram_arch = "slc6_amd64_gcc700",
            arguments = "testarg1",
            tarfile = "input.tar.gz",
            condor_submit_params = {"sites": "UAF,T2_US_UCSD,UCSB"},
            no_load_from_backup = True, # for the purpose of the example, don't use a backup
    )
    # do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(), dataset="/Blah/blah/BLAH", globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
Beispiel #5
0
total_summary = {}
while True:
    allcomplete = True
    for dataset, info in datasets.items():
        if args.data_only and not info["isData"]:
            continue
        if "VHToGG" in dataset:
            print "Skipping VH for now"
            continue
        #if not ("Run2016" in dataset or "Run2017" in dataset or "Run2018" in dataset):
        #  continue
        if "ttHiggs" in dataset:
            print("Submitting jobs for: ", dataset)
        sample = DirectorySample(dataset=dataset, location=info["input_loc"])
        files = [f.name for f in sample.get_files()]
        print(len(files))
        task = CondorTask(sample=sample,
                          open_dataset=False,
                          flush=True,
                          files_per_output=info["fpo"],
                          output_name="merged_ntuple.root",
                          tag=job_tag,
                          cmssw_version=cmssw_ver,
                          executable=exec_path,
                          tarfile=tar_path,
                          condor_submit_params={"sites": "T2_US_UCSD"},
                          special_dir=hadoop_path,
                          arguments=info["meta_conditions"])
        task.process()
        if not task.complete():
Beispiel #6
0
  else:
    print("here")
    dslocs.append(["/" + name + "/", base_path + "/" + name + "/", nFilesPerOutput]) 


print(dslocs)
#time.sleep(300)


total_summary = {}
while True:
    allcomplete = True
    for ds,loc,fpo in dslocs:
	print(loc)
        sample = DirectorySample( dataset=ds, location=loc )
        files = [f.name for f in sample.get_files()]
        sample.set_files([f.name for f in sample.get_files() if "/cms/store/user/bemarsh/flashgg/MicroAOD/forHualin_2017/GJet_Pt-40toInf_DoubleEMEnriched_MGG-80toInf_TuneCP5_13TeV_Pythia8_RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v2_MINIAODSIM_forHualin_2017/test_skim_630.root" not in f.name])
        #print [f.name for f in sample.get_files()]
        task = CondorTask(
                sample = sample,
                open_dataset = False,
                flush = True,
                files_per_output = fpo,
                output_name = "merged_ntuple.root",
                tag = job_tag,
                cmssw_version = cmssw_ver, # doesn't do anything
                executable = exec_path,
                tarfile = tar_path,
                condor_submit_params = {"sites" : "T2_US_UCSD"},
                special_dir = hadoop_path
                )
Beispiel #7
0
print("Will run over the following samples:")
print(sample_list)
print()

for s in sample_list:
    if samples[s]['path'] is not None:
        sample = DirectorySample(dataset=samples[s]['name'],
                                 location=samples[s]['path'])
    else:
        sample = DBSSample(dataset=s)  # should we make use of the files??

    year, era, isData, isFastSim = getYearFromDAS(s)

    print("Now working on sample: %s" % s)
    print("- has %s files" % len(sample.get_files()))
    print("- is %s, corresponding to year %s. %s simulation is used." %
          ('Data' if isData else 'MC', year, 'Fast' if isFastSim else 'Full'))
    if isData:
        print("The era is: %s" % era)
    # merge three files into one for all MC samples except ones where we expect a high efficiency of the skim
    signal_string = re.compile("TTW.*EWK")
    mergeFactor = min(4, samples[s]['split']) if not (
        samples[s]['name'].count('tW_scattering')
        or re.search(signal_string, samples[s]['name'])
    ) else samples[s][
        'split']  # not running over more than 4 files because we prefetch...
    print("- using merge factor: %s" % mergeFactor)

    #lumiWeightString = 1000*samples[s]['xsec']/samples[s]['sumWeight'] if not isData else 1
    lumiWeightString = 1 if (
Beispiel #8
0
    #    [ "/TTJets_DiLept_ext1", "/hadoop/cms/store/group/snt/run2_moriond17/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/V08-00-16/" ] ,
    #    [ "/TTJets_HT-600to800", "/hadoop/cms/store/group/snt/run2_moriond17/TTJets_HT-600to800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/V08-00-16/" ] ,
    #    [ "/TTJets_HT-800to1200", "/hadoop/cms/store/group/snt/run2_moriond17/TTJets_HT-800to1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/V08-00-16/" ] ,
    #    [ "/TTJets_HT-1200to2500", "/hadoop/cms/store/group/snt/run2_moriond17/TTJets_HT-1200to2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/V08-00-16/" ] ,
    #    [ "/TTJets_HT-2500toInf", "/hadoop/cms/store/group/snt/run2_moriond17/TTJets_HT-2500toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/V08-00-16/" ] ,
    #    [ "/TTTo2L2Nu", "/hadoop/cms/store/group/snt/run2_moriond17/TTTo2L2Nu_TuneCUETP8M2_ttHtranche3_13TeV-powheg-pythia8_RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/V08-00-16/" ] ,
]

total_summary = {}
while True:
    allcomplete = True
    for ds, loc in dslocs:
        sample = DirectorySample(dataset=ds, location=loc)
        corrupt_files = corrupt.find_corrupt_files(numpy.array([loc[7:]]))
        files = [
            f.name for f in sample.get_files() if f.name not in corrupt_files
        ]
        sample.set_files(files)
        task = CondorTask(
            sample=sample,
            open_dataset=False,
            flush=True,
            files_per_output=1,
            output_name="merged_ntuple.root",
            tag=job_tag,
            cmssw_version="CMSSW_9_2_1",  # doesn't do anything
            executable=exec_path,
            tarfile=tar_path,
            condor_submit_params={"sites": "T2_US_UCSD"},
            special_dir=hadoop_path)
        task.process()
Beispiel #9
0
        output_name="output.txt",
        executable=exefile.get_name(),
        condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"},
        no_load_from_backup=
        True,  # for the purpose of the example, don't use a backup
    )
    do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(),
                                  dataset="/Blah/blah/BLAH",
                                  globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
Beispiel #10
0
 def test_instantiation(self):
     dsname = "/blah/blah/BLAH/"
     dirsamp = DirectorySample(dataset=dsname, location="/dummy/dir/")
     self.assertEqual(len(dirsamp.get_files()), 0)