Exemple #1
0
def test2():
    dataset_names = [
        # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM",
        "/Dummy_test_StopBabyMaker_v25/CMS4",
    ]

    # Make a base directory
    basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(
        os.getenv("USER"))
    MutableFile(basedir).touch()

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files
    ds = DirectorySample(location=basedir,
                         dataset="/TEST/Examplev1/TEST",
                         globber="*.txt")

    # Make a CondorTask (3 in total, one for each input)
    task = CondorTask(
        sample=ds,
        files_per_output=1,
        tag="v0",
        output_name="ttbar_powheg_pythia8_92X.root",
        executable="condor_executable.sh",
        cmssw_version="CMSSW_9_3_1",
        scram_arch="slc6_amd64_gcc700",
        arguments="testarg1",
        tarfile="input.tar.gz",
        condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"},
        no_load_from_backup=
        True,  # for the purpose of the example, don't use a backup
    )
    # do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(),
                                  dataset="/Blah/blah/BLAH",
                                  globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
Exemple #2
0
def test2():
    dataset_names = [
        # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM",
        "/Dummy_test_StopBabyMaker_v25/CMS4",
    ]

    # Make a base directory
    basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(os.getenv("USER"))
    MutableFile(basedir).touch()

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files
    ds = DirectorySample(location=basedir, dataset="/TEST/Examplev1/TEST", globber="*.txt")

    # Make a CondorTask (3 in total, one for each input)
    task = CondorTask(
            sample = ds,
            files_per_output = 1,
            tag = "v0",
            output_name = "ttbar_powheg_pythia8_92X.root",
            executable = "condor_executable.sh",
            cmssw_version = "CMSSW_9_3_1",
            scram_arch = "slc6_amd64_gcc700",
            arguments = "testarg1",
            tarfile = "input.tar.gz",
            condor_submit_params = {"sites": "UAF,T2_US_UCSD,UCSB"},
            no_load_from_backup = True, # for the purpose of the example, don't use a backup
    )
    # do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(), dataset="/Blah/blah/BLAH", globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
Exemple #3
0
    def test_full(self):
        """
        Touch a root file ("input")
        Submit condor jobs to touch output files for each input file
        and copy them to hadoop
        Jobs get submitted to local universe for speed reasons
        Check output to make sure job completed
        """

        njobs = 2
        cmssw = "CMSSW_8_0_21"
        basedir = "/tmp/{0}/metis/condortask_testfull/".format(
            os.getenv("USER"))
        Utils.do_cmd("mkdir -p {0}".format(basedir))
        tag = "vfull"
        for i in range(1, njobs + 1):
            Utils.do_cmd("touch {0}/input_{1}.root".format(basedir, i))

        logging.getLogger("logger_metis").disabled = True
        dummy = CondorTask(
            sample=DirectorySample(
                location=basedir,
                globber="*.root",
                dataset="/test/test/TEST",
            ),
            open_dataset=False,
            files_per_output=1,
            cmssw_version=cmssw,
            executable=Utils.metis_base() +
            "metis/executables/condor_test_exe.sh",
            tag=tag,
            condor_submit_params={"universe": "local"},
            no_load_from_backup=True,
        )

        # clean up previous directory
        Utils.do_cmd("rm -rf {0}".format(dummy.get_outputdir()))

        is_complete = False
        for t in [1.0, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0]:
            dummy.process()
            time.sleep(t)
            is_complete = dummy.complete()
            if is_complete: break

        self.assertEquals(is_complete, True)
        self.assertEqual(njobs, len(glob.glob(dummy.get_outputdir() + "/*")))
Exemple #4
0
 def test_completion_fraction(self):
     # Make dummy task with no inputs
     # and require min completion fraction to be 0
     logging.getLogger("logger_metis").disabled = True
     dummy = CondorTask(
         sample=DirectorySample(
             location=".",
             globber="*.fake",
             dataset="/testprocess/testprocess/TEST",
         ),
         open_dataset=False,
         files_per_output=1,
         cmssw_version="CMSSW_8_0_20",
         tag="vtest",
         no_load_from_backup=True,
         min_completion_fraction=0.,
     )
     dummy.process()
     self.assertEqual(dummy.complete(), True)
Exemple #5
0
                    condorpath),  # your tarfile with assorted goodies here
                special_dir="VVVAnalysis/{}/{}".format(
                    tag, args.year
                ),  # output files into /hadoop/cms/store/<user>/<special_dir>
                min_completion_fraction=0.50 if skip_tail else 1.0,
                # max_jobs = 10,
            )
            # When babymaking task finishes, fire off a task that takes outputs and merges them locally (hadd)
            # into a file that ends up on nfs (specified by `merged_dir` above)
            merge_task = LocalMergeTask(
                input_filenames=task.get_outputs(),
                output_filename="{}/{}.root".format(merged_dir, shortname),
                ignore_bad=skip_tail,
            )
            # Straightforward logic
            if not task.complete():
                task.process()
            else:
                if not merge_task.complete():
                    merge_task.process()

            # Aggregate whether all tasks are complete
            all_tasks_complete = all_tasks_complete and task.complete()

            # Set task summary
            task_summary[
                task.get_sample().get_datasetname()] = task.get_task_summary()

        # Parse the summary and make a summary.txt that will be used to pretty status of the jobs
        StatsParser(data=task_summary,
                    webdir="~/public_html/VVVNanoLooperDashboard").do()
            # check_expectedevents = True,
            tag=tag,
            cmssw_version="CMSSW_9_2_8",
            scram_arch="slc6_amd64_gcc530",
            no_load_from_backup=True,
        )
        maker_tasks.append(maker_task)
        merge_tasks.append(merge_task)

    for i in range(100):
        total_summary = {}

        for maker_task, merge_task in zip(maker_tasks, merge_tasks):
            maker_task.process()

            if maker_task.complete():
                do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
                do_cmd("mkdir -p {}/skimmed".format(
                    maker_task.get_outputdir()))
                merge_task.reset_io_mapping()
                merge_task.update_mapping()
                merge_task.process()

            total_summary[maker_task.get_sample().get_datasetname(
            )] = maker_task.get_task_summary()
            total_summary[merge_task.get_sample().get_datasetname(
            )] = merge_task.get_task_summary()

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary,
                    webdir="~/public_html/dump/metis_stopbaby/").do()
Exemple #7
0
def submit_metis(job_tag, samples_map, sample_list=[], arguments_map="", exec_script="metis.sh", tar_files=[], hadoop_dirname="testjobs", files_per_output=1, globber="*.root", sites="T2_US_UCSD"):

    import time
    import json
    import metis

    from time import sleep

    from metis.Sample import DirectorySample
    from metis.CondorTask import CondorTask

    from metis.StatsParser import StatsParser

    import os
    import glob
    import subprocess


    # file/dir paths
    main_dir             = os.getcwd()
    metis_path           = os.path.dirname(os.path.dirname(metis.__file__))
    tar_path             = os.path.join(metis_path, "package.tar")
    tar_gz_path          = tar_path + ".gz"
    metis_dashboard_path = os.path.join(metis_path, "dashboard")
    exec_path            = os.path.join(main_dir, exec_script)
    hadoop_path          = "metis/{}/{}".format(hadoop_dirname, job_tag) # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path"

    # Create tarball
    os.chdir(main_dir)
    print os.getcwd()
    print "tar -chzf {} {}".format(tar_gz_path, " ".join(tar_files))
    os.system("tar -chzf {} {}".format(tar_gz_path, " ".join(tar_files)))

    # Change directory to metis
    os.chdir(metis_path)

    total_summary = {}

    # if no sample_list is provided then we form it via the keys of the samples_map
    if len(sample_list) == 0:
        for key in samples_map:
            sample_list.append(key)

    samples_to_run = []
    for key in sample_list:
        samples_to_run.append(
                DirectorySample(
                    dataset=key,
                    location=samples_map[key],
                    globber=globber,
                    )
                )

    files_per_output_config_list = []
    if isinstance(files_per_output, dict):
        for key in sample_list:
            files_per_output_config_list.append(files_per_output[key])
    else:
        for key in sample_list:
            files_per_output_config_list.append(files_per_output)

    # Loop over datasets to submit
    while True:

        all_tasks_complete = True

        #for sample in sorted(samples):
        for index, sample in enumerate(samples_to_run):

            # define the task
            maker_task = CondorTask(
                    sample               = sample,
                    tag                  = job_tag,
                    arguments            = arguments_map[sample.get_datasetname()] if arguments_map else "",
                    executable           = exec_path,
                    tarfile              = tar_gz_path,
                    special_dir          = hadoop_path,
                    output_name          = "output.root",
                    files_per_output     = files_per_output_config_list[index],
                    condor_submit_params = {"sites" : sites},
                    open_dataset         = False,
                    flush                = True,
                    #no_load_from_backup  = True,
                    )

            # process the job (either submits, checks for resubmit, or finishes etc.)
            maker_task.process()

            # save some information for the dashboard
            total_summary["["+job_tag+"] "+maker_task.get_sample().get_datasetname()] = maker_task.get_task_summary()

            # Aggregate whether all tasks are complete
            all_tasks_complete = all_tasks_complete and maker_task.complete()


        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary, webdir=metis_dashboard_path).do()

        # Print msummary table so I don't have to load up website
        os.system("msummary -r -p {} | tee summary.txt".format(job_tag))
        os.system("chmod -R 755 {}".format(metis_dashboard_path))
        os.system("chmod 644 {}/images/*".format(metis_dashboard_path))

        # If all done exit the loop
        if all_tasks_complete:
            print ""
            print "Job={} finished".format(job_tag)
            print ""
            break

        # Neat trick to not exit the script for force updating
        print 'Press Ctrl-C to force update, otherwise will sleep for 300 seconds'
        try:
            for i in range(0,60):
                sleep(1) # could use a backward counter to be preeety :)
        except KeyboardInterrupt:
            raw_input("Press Enter to force update, or Ctrl-C to quit.")
            print "Force updating..."

    os.chdir(main_dir)
Exemple #8
0
            scram_arch="slc6_amd64_gcc700",
            min_completion_fraction=1.00,
        )

        tasks.append(task)

if submit:
    for i in range(100):
        total_summary = {}
        fracs = []

        for task in tasks:
            #for maker_task in maker_tasks:
            task.process()

            frac = task.complete(return_fraction=True)

            total_summary[
                task.get_sample().get_datasetname()] = task.get_task_summary()

            fracs.append(frac)

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary,
                    webdir="~/public_html/dump/metis_%s/" % model).do()

        print "%s/%s jobs are completed." % (sum(fracs), len(fracs))

        if sum(fracs) / len(fracs) == 1:
            print "Done."
            break
Exemple #9
0
            files_per_output=6,
            output_name="merged.root",
            tag=job_tag,
            #cmssw_version = "CMSSW_9_2_1", # doesn't do anything
            arguments=args,
            executable=exec_path,
            tarfile=tar_path,
            special_dir=hadoop_path,
            #condor_submit_params = {"universe" : "local"}
            condor_submit_params={"sites": "UAF"})

        task.process()

        # save some information for the dashboard
        total_summary["WWW_v0_1_%s_%s" %
                      (baby_version, job_tag)] = task.get_task_summary()

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary,
                    webdir="~/public_html/tasutil/Metis_WWW/").do()

        if task.complete():
            print ""
            print "Job=%s finished" % job_tag
            print ""
            break

        time.sleep(2 * 60)

#eof
Exemple #10
0
        total_summary = {}

        if not os.path.isdir(maker_task.get_outputdir()):
            os.makedirs(maker_task.get_outputdir())
        if not os.path.isdir(maker_task.get_outputdir() + '/merged'):
            os.makedirs(maker_task.get_outputdir() + '/merged')
            os.makedirs(maker_task.get_outputdir() + '/skimmed')
        #do_cmd("mkdir -p {}".format(maker_task.get_outputdir()))
        #do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
        #do_cmd("mkdir -p {}/skimmed".format(maker_task.get_outputdir()))

        for maker_task, merge_task in zip(maker_tasks, merge_tasks):
            #for maker_task in maker_tasks:
            maker_task.process()

            frac = maker_task.complete(return_fraction=True)
            if frac >= maker_task.min_completion_fraction:
                # if maker_task.complete():
                #    do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
                #    do_cmd("mkdir -p {}/skimmed".format(maker_task.get_outputdir()))
                merge_task.reset_io_mapping()
                merge_task.update_mapping()
                merge_task.process()

            total_summary[maker_task.get_sample().get_datasetname(
            )] = maker_task.get_task_summary()
            total_summary[merge_task.get_sample().get_datasetname(
            )] = merge_task.get_task_summary()

        print(frac)
def get_tasks(samples_dictionary, year, baby_type, baby_version_tag, dotestrun=False, files_per_output_func=UNITY):

    job_tag = "{}{}_{}".format(baby_type, year, baby_version_tag)

    # file/dir paths
    main_dir             = os.path.dirname(os.path.abspath(__file__))
    metis_path           = os.path.dirname(os.path.dirname(metis.__file__))
    tar_path             = os.path.join(metis_path, "package_{}.tar".format(job_tag))
    tar_gz_path          = tar_path + ".gz"
    exec_path            = os.path.join(main_dir, "metis.sh")
    merge_exec_path      = os.path.join(main_dir, "merge.sh")
    hadoop_path          = "metis/{}/{}".format(hadoop_dirname, job_tag) # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path"

    # Extra arguments that will be passed on to ./processBaby to specific which baby to create from the babymaker binary executable
    args = ""
    if job_tag.find("WVZ") != -1:
        args = "1"
    if job_tag.find("Dilep") != -1:
        args = "2"
    if job_tag.find("Trilep") != -1:
        args = "3"
    if job_tag.find("WVZMVA") != -1:
        args = "4"
    if job_tag.find("Truth") != -1:
        args = "5"
    if job_tag.find("WVZAll") != -1:
        args = "6"

    # Change directory to metis
    os.chdir(metis_path)

    tasks = []

    # BEGIN Sample Loop -------->
    # loop over the samples
    for sample in sorted(samples_dictionary.iterkeys()):

        #
        # Job 1 : Creating baby
        #

        # define the task
        maker_task = CondorTask(
                sample               = SNTSample(dataset=sample,
                                                 # exclude_tag_pattern="CMS4_V08-*", # ignore new samples by sicheng for 2016 
                                                 exclude_tag_pattern="*v516*", # ignore new samples by sicheng for 2016 
                                                 ),
                tag                  = job_tag,
                arguments            = args,
                executable           = exec_path,
                tarfile              = tar_gz_path,
                special_dir          = hadoop_path,
                output_name          = "output.root",
                files_per_output     = files_per_output_func(sample),
                # files_per_output     = 1,
                condor_submit_params = {"sites" : "T2_US_UCSD"},
                # condor_submit_params = {"sites" : "UAF,T2_US_Wisconsin,T2_US_Florida,T2_US_Nebraska,T2_US_Caltech,T2_US_MIT,T2_US_Purdue"},
                # condor_submit_params = {"sites" : "UAF,T2_US_Wisconsin,T2_US_Florida,T2_US_Nebraska,T2_US_Caltech,T2_US_MIT"},
                # condor_submit_params = {"sites" : "UAF"},
                open_dataset         = False,
                flush                = True,
                max_jobs             = 5 if dotestrun else 0,
                # min_completion_fraction = 1.0 if "Run201" in sample else 0.9,
                # min_completion_fraction = 0.9,
                #no_load_from_backup  = True,
                )

        print sample, job_tag

        tasks.append(maker_task)

        #
        # Job 2 : Merging baby outputs
        #

        if maker_task.complete() and not dotestrun:

            merge_sample_name = "/MERGE_"+sample[1:]

            #merge_task = CondorTask(
            #        sample                 = DirectorySample(dataset=merge_sample_name, location=maker_task.get_outputdir()),
            #        # open_dataset         = True, flush = True,
            #        executable             = merge_exec_path,
            #        tarfile                = tar_gz_path,
            #        files_per_output       = 1,
            #        output_dir             = maker_task.get_outputdir() + "/merged",
            #        output_name            = samples_dictionary[sample] + ".root",
            #        condor_submit_params   = {"sites":"T2_US_UCSD"},
            #        output_is_tree         = True,
            #        # check_expectedevents = True,
            #        tag                    = job_tag,
            #        cmssw_version          = "CMSSW_9_2_0",
            #        scram_arch             = "slc6_amd64_gcc530",
            #        #no_load_from_backup    = True,
            #        max_jobs               = 1,
            #        )
            # merge_task.reset_io_mapping()
            # merge_task.update_mapping()
            # tasks.append(merge_task)

            # merge_task = LocalMergeTask(
            #         input_filenames=maker_task.get_outputs(),
            #         output_filename="{}/{}.root".format(maker_task.get_outputdir() + "/merged", samples_dictionary[sample]),
            #         ignore_bad = False,
            #         )

            input_arg = maker_task.get_outputs()[0].name.replace("_1.root", "")
            hadd_command = "sh ../rooutil/addHistos.sh /tmp/{}_1 {}".format(samples_dictionary[sample], input_arg)
            hadoop_output = "{}/{}_1.root".format(maker_task.get_outputdir() + "/merged", samples_dictionary[sample])
            cp_command = "cp /tmp/{}_1.root {}".format(samples_dictionary[sample], hadoop_output)

            print ""
            print ""
            print ""
            print ""
            print ""

            print hadoop_output

            if not os.path.exists(hadoop_output):

                print hadd_command
                # os.system(hadd_command)
                print cp_command
                # os.system(cp_command)

            print ""
            print ""
            print ""
            print ""

            # if not merge_task.complete():
            #     merge_task.process()


    # <------ END Sample Loop

    return tasks
Exemple #12
0
            output_name="test_rawreco.root",
            tag=job_tag,
            cmssw_version=cmssw_ver,
            executable=exec_path,
            tarfile="./package.tar.gz",
            condor_submit_params={
                "sites":
                "T2_US_UCSD",
                "container":
                "/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel7-m20201010"
            },
            #condor_submit_params = {"sites" : "T2_US_UCSD,T2_US_CALTECH,T2_US_MIT,T2_US_WISCONSIN,T2_US_Nebraska,T2_US_Purdue,T2_US_Vanderbilt,T2_US_Florida"},
            special_dir=hadoop_path,
            arguments=args.replace(" ", "|"))
        task.process()
        allcomplete = allcomplete and task.complete()
        # save some information for the dashboard
        total_summary[ds] = task.get_task_summary()
        with open("summary.json", "w") as f_out:
            json.dump(total_summary, f_out, indent=4, sort_keys=True)

    # Loop through local samples
    #for ds,loc,fpo,args in local_sets[:]:
    #    sample = DirectorySample( dataset = ds, location = loc )
    #    files = [f.name for f in sample.get_files()]
    #    print "For sample %s in directory %s, there are %d input files" % (ds, loc, len(files))
    #    #for file in files:
    #    #    print file

    #    task = CondorTask(
    #            sample = sample,
        )
        # print(merge_task.get_sample().get_datasetname())
        # print(merge_task.get_sample().info["location"])

        maker_tasks.append(child_tasks)
        merge_tasks.append(merge_task)
        

    for i in range(100):
        total_summary = {}
        
        for child_tasks, merge_task in zip(maker_tasks,merge_tasks):
            all_child_finishes = True
            for maker_task in child_tasks:
                maker_task.process()
                if not maker_task.complete(): 
                    all_child_finishes = False

            if all_child_finishes:
                do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
                merge_task.reset_io_mapping()
                merge_task.update_mapping()
                merge_task.process()

            if merge_task.complete():
                outfile = '{}/{}_1.root'.format(merge_task.get_outputdir(), merge_task.output_name.split('.')[0])
                target = outfile.split('resub')[0]
                tmp = merge_task.get_sample().get_datasetname().split('_')
                target += 'stopBaby_{}/{}'.format('_'.join(tmp[1:-2]), merge_task.output_name)
                # print('cp {} {}'.format(outfile, target))
                print('cp {} {}'.format(outfile, target))
def get_tasks(samples_dictionary, year, baby_type, baby_version_tag, dotestrun=False):

    job_tag = "{}{}_{}".format(baby_type, year, baby_version_tag)

    # file/dir paths
    main_dir             = os.path.dirname(os.path.abspath(__file__))
    metis_path           = os.path.dirname(os.path.dirname(metis.__file__))
    tar_path             = os.path.join(metis_path, "package.tar")
    tar_gz_path          = tar_path + ".gz"
    exec_path            = os.path.join(main_dir, "metis.sh")
    merge_exec_path      = os.path.join(main_dir, "merge.sh")
    hadoop_path          = "metis/wwwbaby/{}".format(job_tag) # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path"

    # Extra arguments that will be passed on to ./processBaby to specific which baby to create from the babymaker binary executable
    if job_tag.find("WWW") != -1:
        args = "0" # WWWBaby
    elif job_tag.find("FR") != -1:
        args = "1" # FRBaby
    elif job_tag.find("OS") != -1:
        args = "2" # OSBaby
    elif job_tag.find("TnP") != -1:
        args = "3" # TnPBaby
    elif job_tag.find("All") != -1:
        args = "4" # AllBaby
    elif job_tag.find("POG") != -1:
        args = "5" # POGBaby
    elif job_tag.find("Loose") != -1:
        args = "6" # LooseBaby

    # Change directory to metis
    os.chdir(metis_path)

    tasks = []

    # BEGIN Sample Loop -------->
    # loop over the samples
    for sample in sorted(samples_dictionary.iterkeys()):

        #
        # Job 1 : Creating baby
        #

        # define the task
        maker_task = CondorTask(
                sample               = SNTSample(dataset=sample,
                                                 exclude_tag_pattern="CMS4_V08-*", # ignore new samples by sicheng for 2016 
                                                 ),
                tag                  = job_tag,
                arguments            = args,
                executable           = exec_path,
                tarfile              = tar_gz_path,
                special_dir          = hadoop_path,
                output_name          = "output.root",
                files_per_output     = 4,
                condor_submit_params = {"sites" : "T2_US_UCSD"},
                open_dataset         = False,
                flush                = True,
                max_jobs             = 5 if dotestrun else 0
                #no_load_from_backup  = True,
                )

        print sample, job_tag

        tasks.append(maker_task)

        #
        # Job 2 : Merging baby outputs
        #

        if maker_task.complete() and not dotestrun:

            merge_sample_name = "/MERGE_"+sample[1:]

            merge_task = CondorTask(
                    sample                 = DirectorySample(dataset=merge_sample_name, location=maker_task.get_outputdir()),
                    # open_dataset         = True, flush = True,
                    executable             = merge_exec_path,
                    tarfile                = tar_gz_path,
                    files_per_output       = 100000,
                    output_dir             = maker_task.get_outputdir() + "/merged",
                    output_name            = samples_dictionary[sample] + ".root",
                    condor_submit_params   = {"sites":"T2_US_UCSD"},
                    output_is_tree         = True,
                    # check_expectedevents = True,
                    tag                    = job_tag,
                    cmssw_version          = "CMSSW_9_2_0",
                    scram_arch             = "slc6_amd64_gcc530",
                    #no_load_from_backup    = True,
                    )
            merge_task.reset_io_mapping()
            merge_task.update_mapping()

            tasks.append(merge_task)

    # <------ END Sample Loop

    return tasks
Exemple #15
0
def main():

    # file/dir paths
    main_dir = os.path.dirname(os.path.abspath(__file__))
    metis_path = os.path.dirname(os.path.dirname(metis.__file__))
    tar_path = os.path.join(metis_path, "package.tar")
    tar_gz_path = tar_path + ".gz"
    metis_dashboard_path = os.path.join(metis_path, "dashboard")
    exec_path = os.path.join(main_dir, "metis.sh")
    hadoop_path = "metis/baby/{}".format(
        job_tag
    )  # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path"

    # Create tarball
    os.chdir(main_dir)
    os.system("tar -chzf {} a.out setup.sh *.xml".format(tar_gz_path))

    # Change directory to metis
    os.chdir(metis_path)

    total_summary = {}

    samples_to_run = [
        DirectorySample(
            dataset=
            "/GluGluHToWWToLNuQQ_M125_13TeV_powheg_JHUGenV628_pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber="GluGluHToWWToLNuQQ_M125_13TeV_powheg_JHUGenV628*.root",
        ),
        DirectorySample(
            dataset=
            "/VBFHToWWToLNuQQ_M125_13TeV_powheg_JHUGenV628_pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber="VBFHToWWToLNuQQ_M125_13TeV_powheg_JHUGenV628*.root",
        ),
        DirectorySample(
            dataset=
            "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber="TT_TuneCUETP8M2T4_13TeV-powheg*.root",
        ),
        DirectorySample(
            dataset=
            "/WJetsToLNu_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber="WJetsToLNu_TuneCUETP8M1_13TeV-madgraphMLM*.root",
        ),
        DirectorySample(
            dataset=
            "/WJetsToLNu_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber=
            "WJetsToLNu_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM_ext1*.root",
        ),
        DirectorySample(
            dataset=
            "/WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber=
            "WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM_ext1*.root",
        ),
        DirectorySample(
            dataset=
            "/WJetsToLNu_HT-800To1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber=
            "WJetsToLNu_HT-800To1200_TuneCUETP8M1_13TeV-madgraphMLM_ext1*.root",
        ),
        DirectorySample(
            dataset=
            "/WJetsToLNu_HT-1200To2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber=
            "WJetsToLNu_HT-1200To2500_TuneCUETP8M1_13TeV-madgraphMLM*.root",
        ),
        DirectorySample(
            dataset=
            "/WJetsToLNu_HT-2500ToInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v1/MINIAODSIM",
            location=
            "/hadoop/cms/store/user/phchang/metis/baby/HWW2016_v1.15.1/merged4",
            globber=
            "WJetsToLNu_HT-2500ToInf_TuneCUETP8M1_13TeV-madgraphMLM_ext1*.root",
        ),
    ]

    # Loop over datasets to submit
    while True:

        all_tasks_complete = True

        #for sample in sorted(samples):
        for sample in samples_to_run:

            # define the task
            maker_task = CondorTask(
                sample=sample,
                tag=job_tag,
                arguments="",
                executable=exec_path,
                tarfile=tar_gz_path,
                special_dir=hadoop_path,
                output_name="output.root",
                files_per_output=1,
                condor_submit_params={"sites": "T2_US_UCSD"},
                open_dataset=False,
                flush=True,
                #no_load_from_backup  = True,
            )

            # process the job (either submits, checks for resubmit, or finishes etc.)
            maker_task.process()

            # save some information for the dashboard
            total_summary[maker_task.get_sample().get_datasetname(
            )] = maker_task.get_task_summary()

            # Aggregate whether all tasks are complete
            all_tasks_complete = all_tasks_complete and maker_task.complete()

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary, webdir=metis_dashboard_path).do()

        # Print msummary table so I don't have to load up website
        os.system("msummary -r | tee summary.txt")
        os.system("chmod -R 755 {}".format(metis_dashboard_path))

        # If all done exit the loop
        if all_tasks_complete:
            print ""
            print "Job={} finished".format(job_tag)
            print ""
            break

        # Neat trick to not exit the script for force updating
        print 'Press Ctrl-C to force update, otherwise will sleep for 300 seconds'
        try:
            for i in range(0, 300):
                sleep(1)  # could use a backward counter to be preeety :)
        except KeyboardInterrupt:
            raw_input("Press Enter to force update, or Ctrl-C to quit.")
            print "Force updating..."
Exemple #16
0
        tag="v0",
        output_name="output.txt",
        executable=exefile.get_name(),
        condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"},
        no_load_from_backup=
        True,  # for the purpose of the example, don't use a backup
    )
    do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(),
                                  dataset="/Blah/blah/BLAH",
                                  globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
Exemple #17
0
def main():

    if data_year == "2016":
        samples_to_run = dataset.hww_samples_to_run_2016
        samples_short_name = dataset.samples_short_name_2016
        dslocs = dataset.dslocscms4_2016_allpf

    # file/dir paths
    main_dir = os.path.dirname(os.path.abspath(__file__))
    metis_path = os.path.dirname(os.path.dirname(metis.__file__))
    tar_path = os.path.join(metis_path, "package.tar")
    tar_gz_path = tar_path + ".gz"
    metis_dashboard_path = os.path.join(metis_path, "dashboard")
    exec_path = os.path.join(main_dir, "metis.sh")
    merge_exec_path = os.path.join(main_dir, "merge.sh")
    hadoop_path = "metis/baby/{}".format(
        job_tag
    )  # The output goes to /hadoop/cms/store/user/$USER/"hadoop_path"
    if job_tag.find("HWW") != -1:
        args = "0"  # HWWBaby

    # Create tarball
    os.chdir(main_dir)
    os.system(
        "tar -chzf {} localsetup.sh processBaby *.so *.pcm rooutil/lib*.so coreutil/data coreutil/lib*.so *.txt btagsf MVAinput jetCorrections leptonSFs puWeight2016.root pileup_jul21_nominalUpDown.root ../CORE/Tools/ mergeHadoopFiles.C rooutil/hadd.py fastjet/fastjet-install/lib"
        .format(tar_gz_path))

    # Change directory to metis
    os.chdir(metis_path)

    total_summary = {}

    # Loop over datasets to submit
    while True:

        all_tasks_complete = True

        for sample in samples_to_run:

            loc = dslocs[sample]

            # define the task
            maker_sample_name = "/MAKER_" + sample[1:]
            maker_task = CondorTask(
                sample=DirectorySample(dataset=maker_sample_name,
                                       location=loc),
                tag=job_tag,
                arguments=args,
                executable=exec_path,
                tarfile=tar_gz_path,
                special_dir=hadoop_path,
                output_name="output.root",
                files_per_output=1,
                condor_submit_params={"sites": "T2_US_UCSD"},
                open_dataset=False,
                flush=True,
                #min_completion_fraction = 0.5,
                #no_load_from_backup  = True,
            )

            # process the job (either submits, checks for resubmit, or finishes etc.)
            maker_task.process()

            # save some information for the dashboard
            total_summary[maker_task.get_sample().get_datasetname(
            )] = maker_task.get_task_summary()

            # define the task
            merge_sample_name = "/MERGE_" + sample[1:]
            merge_task = CondorTask(
                sample=DirectorySample(dataset=merge_sample_name,
                                       location=maker_task.get_outputdir()),
                tag=job_tag,
                executable=merge_exec_path,
                tarfile=tar_gz_path,
                files_per_output=100000,
                output_dir=maker_task.get_outputdir() + "/merged",
                output_name=samples_short_name[sample] + ".root",
                condor_submit_params={"sites": "T2_US_UCSD"},
                open_dataset=False,
                flush=True,
                output_is_tree=True,
                cmssw_version="CMSSW_9_2_0",
                scram_arch="slc6_amd64_gcc530",
                #no_load_from_backup  = True,
            )

            if maker_task.complete():

                # process the job (either submits, checks for resubmit, or finishes etc.)
                merge_task.process()

                # save some information for the dashboard
                total_summary[merge_task.get_sample().get_datasetname(
                )] = merge_task.get_task_summary()

            # Aggregate whether all tasks are complete
            all_tasks_complete = all_tasks_complete and maker_task.complete()

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary, webdir=metis_dashboard_path).do()

        # Print msummary table so I don't have to load up website
        os.system("msummary -r | tee summary.txt")
        os.system("chmod -R 755 {}".format(metis_dashboard_path))

        # If all done exit the loop
        if all_tasks_complete:
            print ""
            print "Job={} finished".format(job_tag)
            print ""
            break

        # Neat trick to not exit the script for force updating
        print 'Press Ctrl-C to force update, otherwise will sleep for 300 seconds'
        try:
            for i in range(0, 300):
                sleep(1)  # could use a backward counter to be preeety :)
        except KeyboardInterrupt:
            raw_input("Press Enter to force update, or Ctrl-C to quit.")
            print "Force updating..."
Exemple #18
0
            tag = tag,
            cmssw_version = "CMSSW_9_2_8",
            scram_arch = "slc6_amd64_gcc530",
            no_load_from_backup = True,
        )
        maker_tasks.append(maker_task)
        merge_tasks.append(merge_task)


    for i in range(100):
        total_summary = {}

        for maker_task, merge_task in zip(maker_tasks,merge_tasks):
            maker_task.process()

            if maker_task.complete():
                do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
                do_cmd("mkdir -p {}/skimmed".format(maker_task.get_outputdir()))
                merge_task.reset_io_mapping()
                merge_task.update_mapping()
                merge_task.process()

            total_summary[maker_task.get_sample().get_datasetname()] = maker_task.get_task_summary()
            total_summary[merge_task.get_sample().get_datasetname()] = merge_task.get_task_summary()

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary, webdir="~/public_html/dump/metis_stopbaby/").do()

        # 1 hr power nap
        time.sleep(15.*60)
Exemple #19
0
            files_per_output=100000,
            output_dir=maker_task.get_outputdir() + "/merged",
            output_name=samples[sample] + "_skim_1.root",
            condor_submit_params={"sites": "T2_US_UCSD"},
            output_is_tree=True,
            # check_expectedevents = True,
            tag=job_tag,
            cmssw_version="CMSSW_9_2_0",
            scram_arch="slc6_amd64_gcc530",
            #no_load_from_backup    = True,
        )

        # process the job (either submits, checks for resubmit, or finishes etc.)
        maker_task.process()

        if maker_task.complete():
            merge_task.reset_io_mapping()
            merge_task.update_mapping()
            merge_task.process()

        # save some information for the dashboard
        total_summary[maker_task.get_sample().get_datasetname(
        )] = maker_task.get_task_summary()
        total_summary[merge_task.get_sample().get_datasetname(
        )] = merge_task.get_task_summary()

        # Aggregate whether all tasks are complete
        all_tasks_complete = all_tasks_complete and maker_task.complete(
        ) and merge_task.complete()

    # parse the total summary and write out the dashboard
Exemple #20
0
        files = [f.name for f in sample.get_files()]
        print(len(files))
        task = CondorTask(sample=sample,
                          open_dataset=False,
                          flush=True,
                          files_per_output=info["fpo"],
                          output_name="merged_ntuple.root",
                          tag=job_tag,
                          cmssw_version=cmssw_ver,
                          executable=exec_path,
                          tarfile=tar_path,
                          condor_submit_params={"sites": "T2_US_UCSD"},
                          special_dir=hadoop_path,
                          arguments=info["meta_conditions"])
        task.process()
        if not task.complete():
            allcomplete = False
        # save some information for the dashboard
        total_summary[dataset] = task.get_task_summary()
    # parse the total summary and write out the dashboard
    StatsParser(data=total_summary,
                webdir="~/public_html/dump/ttH_BabyMaker/").do()
    os.system("chmod -R 755 ~/public_html/dump/ttH_BabyMaker")
    if allcomplete:
        print("")
        print("Job={} finished".format(job_tag))
        print("")
        break
    print("Sleeping 300 seconds ...")
    time.sleep(300)
Exemple #21
0
    # into a file that ends up on nfs (specified by `merged_dir` above)
    for dsname,shortname in merge_map.items():
        merge_task = LocalMergeTask(
            input_filenames=task.get_outputs(),
            output_filename="{}/{}.root".format(merged_dir,shortname)
            )
        merge_tasks[dsname] = merge_task
    first_iteration = True
    done = False    
    while not done:
#        summary_info = {}
        done = True # Assume all tasks are complete until we see otherwise
        for dsname,shortname in sample_map.items():
            task = tasks[dsname]
            # Straightforward logic
            if not task.complete():
                task.process()
                done = False
        for dsname,shortname in sample_map_merge.items():
            task = tasks[dsname]
            merge_task = merge_tasks[dsname]            
            if task.complete() and not merge_task.complete():
                merge_task.process()
                done = False
#            summary_info[shortname+"_babymaking"] = task.get_task_summary()
#        StatsParser(data=summary_info, webdir="~/public_html/dump/metis/", make_plots=False).do()
        # sleep for 10 min unless it's the very first run of the loop. In that case sleep for 1 min, to permit a quick check that things are working
        if first_iteration:
            time.sleep(60)
            first_iteration = False
        else: