コード例 #1
0
    def test_workflow(self):

        import ROOT as r

        basepath = "/tmp/{}/metis/localmerge/".format(os.getenv("USER"))

        # Make the base directory
        MutableFile(basepath).touch()

        # Clean up before running
        do_cmd("rm {}/*.root".format(basepath))

        for i in range(0, 3):
            f = r.TFile("{}/in_{}.root".format(basepath, i), "RECREATE")
            h = r.TH1F()
            h.Write()
            f.Close()

        outname = "/home/users/namin/2017/test/ProjectMetis/testout/out.root"
        task = LocalMergeTask(
            # input_filenames=glob.glob("/hadoop/cms/store/user/namin/AutoTwopler_babies/FT_v1.06_v2/W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8_RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/output/output_4*.root"),
            input_filenames=glob.glob(basepath + "/in_*.root"),
            output_filename=basepath + "/out.root",
        )

        task.process()

        self.assertEqual(task.get_outputs()[0].exists(), True)
コード例 #2
0
    def test_workflow(self):

        basepath = "/tmp/{}/metis/".format(os.getenv("USER"))

        # Clean up before running
        do_cmd("rm {}/*.root".format(basepath))

        # Make the base directory
        MutableFile(basepath).touch()

        # Set up 4 layers of input->output files
        step0, step1, step2, step3 = [], [], [], []
        for i in range(3):
            step0.append(
                MutableFile(name="{}/step0_{}.root".format(basepath, i)))
            step1.append(
                MutableFile(name="{}/step1_{}.root".format(basepath, i)))
            step2.append(
                MutableFile(name="{}/step2_{}.root".format(basepath, i)))
            step3.append(
                MutableFile(name="{}/step3_{}.root".format(basepath, i)))

        # Touch the step0 files to ensure they "exist", but they're still empty
        list(map(lambda x: x.touch(), step0))

        # Make a DummyMoveTask with previous inputs, outputs
        # each input will be moved to the corresponding output file
        # by default, completion fraction must be 1.0, but can be specified
        t1 = DummyMoveTask(
            inputs=step0,
            outputs=step1,
            # min_completion_fraction = 0.6,
        )

        # Clone first task for subsequent steps
        t2 = t1.clone(inputs=step1, outputs=step2)
        t3 = t1.clone(inputs=step2, outputs=step3)

        # Make a path, which will run tasks in sequence provided previous tasks
        # finish. Default dependency graph ("scheduled mode") will make it so
        # that t2 depends on t1 and t3 depends on t1
        pa = Path([t1, t2])
        pb = Path([t3])

        # Yes, it was silly to make two paths, but that was done to showcase
        # the following concatenation ability (note that "addition" here is not
        # commutative)
        p1 = pa + pb

        while not p1.complete():
            p1.process()

            time.sleep(0.02)

        self.assertEqual(p1.complete(), True)
コード例 #3
0
ファイル: DummyTask.py プロジェクト: usarica/ProjectMetis
    def process(self):
        """
        Moves (one-to-one) input files to output files
        """

        for inp, out in zip(self.get_inputs(), self.get_outputs()):

            if self.create_inputs and not inp.exists():
                self.logger.debug("Specified create_inputs=True, so creating input file {}".format(inp.get_name()))
                do_cmd("touch {}".format(inp.get_name()))
                inp.recheck()

            do_cmd("mv {} {}".format(inp.get_name(), out.get_name()))
            out.recheck()
            self.logger.debug("Running on {0} -> {1}".format(inp.get_name(), out.get_name()))
コード例 #4
0
ファイル: Sample.py プロジェクト: usarica/ProjectMetis
 def get_globaltag(self):
     if self.info.get("gtag", None):
         return self.info["gtag"]
     if self.dasgoclient:
         cmd = "dasgoclient -query 'config dataset={} system=dbs3' -json".format(
             self.info["dataset"])
         js = json.loads(do_cmd(cmd))
         response = js[0]["config"][0]
     else:
         response = self.do_dis_query(self.info["dataset"], typ="config")
     self.info["gtag"] = str(response["global_tag"])
     self.info["native_cmssw"] = str(response["release_version"])
     return self.info["gtag"]
コード例 #5
0
ファイル: CrabManager_t.py プロジェクト: usarica/ProjectMetis
    def test_config_parameters(self):

        return

        from metis.CrabManager import CrabManager

        basepath = "/tmp/{0}/metis/crab_test/".format(os.getenv("USER"))
        do_cmd("mkdir -p {0}".format(basepath))
        do_cmd("touch {0}/pset.py".format(basepath))
        dataset = "/TTZToLL_M-1to10_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISummer16MiniAODv2-80X_mcRun2_asymptotic_2016_TrancheIV_v6-v1/MINIAODSIM"
        dataset_user = "******"
        request_name = "test_metis_ttzlowmass"
        pset_location = "{0}/pset.py".format(basepath)

        cm1 = CrabManager(
            dataset=dataset,
            request_name=request_name,
            pset_location=pset_location,
        )
        cfg = cm1.get_crab_config()
        self.assertEqual(cfg.JobType.pluginName, "Analysis")
        self.assertEqual(cfg.JobType.psetName, pset_location)
        self.assertEqual(cfg.Data.inputDataset, dataset)
        self.assertEqual(cfg.Data.splitting, "FileBased")
        self.assertEqual(cfg.Data.inputDBS, "global")

        cm2 = CrabManager(
            dataset=dataset_user,
            request_name=request_name,
            pset_location=pset_location,
            plugin_name="MyPlugin",
        )
        cfg = cm2.get_crab_config()
        self.assertEqual(cfg.Data.inputDataset, dataset_user)
        self.assertEqual(cfg.JobType.pluginName, "MyPlugin")
        self.assertEqual(cfg.Data.inputDBS, "phys03")
コード例 #6
0
    def get_unique_request_name(self):

        # trivial check
        if self.unique_request_name:
            return self.unique_request_name

        # more robust check
        crablog = "{0}/crab.log".format(self.task_dir)
        if os.path.isfile(crablog):
            taskline = do_cmd("/bin/grep 'Success' -A 1 -m 1 {0} | /bin/grep 'Task name'".format(crablog))
            if "Task name:" in taskline:
                self.unique_request_name = taskline.split("Task name:")[1].strip()
            self.logger.debug("found crablog {0} and parsing to find unique_request_name: {1}".format(crablog, self.unique_request_name))
            return self.unique_request_name

        return None
コード例 #7
0
ファイル: Sample.py プロジェクト: usarica/ProjectMetis
    def load_from_dasgoclient(self):

        cmd = "dasgoclient -query 'file dataset={}' -json".format(
            self.info["dataset"])
        js = json.loads(do_cmd(cmd))
        fileobjs = []
        for j in js:
            f = j["file"][0]
            if (not hasattr(self, "selection")
                    or self.selection(fdict["name"])):
                fileobjs.append(
                    FileDBS(name=f["name"],
                            nevents=f["nevents"],
                            filesizeGB=round(f["size"] * 1e-9, 2)))
        fileobjs = sorted(fileobjs, key=lambda x: x.get_name())

        self.info["files"] = fileobjs
        self.info["nevts"] = sum(fo.get_nevents() for fo in fileobjs)
コード例 #8
0
ファイル: privateSamples.py プロジェクト: cmstas/StopAnalysis
            cmssw_version = "CMSSW_9_2_8",
            scram_arch = "slc6_amd64_gcc530",
            no_load_from_backup = True,
        )
        maker_tasks.append(maker_task)
        merge_tasks.append(merge_task)


    for i in range(100):
        total_summary = {}

        for maker_task, merge_task in zip(maker_tasks,merge_tasks):
            maker_task.process()

            if maker_task.complete():
                do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
                do_cmd("mkdir -p {}/skimmed".format(maker_task.get_outputdir()))
                merge_task.reset_io_mapping()
                merge_task.update_mapping()
                merge_task.process()

            total_summary[maker_task.get_sample().get_datasetname()] = maker_task.get_task_summary()
            total_summary[merge_task.get_sample().get_datasetname()] = merge_task.get_task_summary()

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary, webdir="~/public_html/dump/metis_stopbaby/").do()

        # 1 hr power nap
        time.sleep(15.*60)

    # If it's complete, make a dummy sample out of the output directory
コード例 #9
0
            tag=tag,
            cmssw_version="CMSSW_9_2_8",
            scram_arch="slc6_amd64_gcc530",
            no_load_from_backup=True,
        )
        maker_tasks.append(maker_task)
        merge_tasks.append(merge_task)

    for i in range(100):
        total_summary = {}

        for maker_task, merge_task in zip(maker_tasks, merge_tasks):
            maker_task.process()

            if maker_task.complete():
                do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
                do_cmd("mkdir -p {}/skimmed".format(
                    maker_task.get_outputdir()))
                merge_task.reset_io_mapping()
                merge_task.update_mapping()
                merge_task.process()

            total_summary[maker_task.get_sample().get_datasetname(
            )] = maker_task.get_task_summary()
            total_summary[merge_task.get_sample().get_datasetname(
            )] = merge_task.get_task_summary()

        # parse the total summary and write out the dashboard
        StatsParser(data=total_summary,
                    webdir="~/public_html/dump/metis_stopbaby/").do()
コード例 #10
0
def get_global_tag(psetname):
    return do_cmd("""tac {} | grep process.GlobalTag.globaltag | head -1 | cut -d '"' -f2""".format(psetname))
コード例 #11
0
        # print "cd .."
        # print

        print "mkdir -p {taskname}".format(taskname=taskname)
        print "(".format(taskname=taskname)
        print "cd {taskname} ; cp ../pset.py .".format(taskname=taskname)
        print "cmsRun -n 1 pset.py inputs={infiles} output={outname} globaltag={globaltag} {psetargs} && ".format(infiles=infiles,outname=outname,globaltag=globaltag,psetargs=psetargs)
        print "gfal-copy -p -f -t 4200 --verbose file://`pwd`/{outname} gsiftp://gftp.t2.ucsd.edu{outdir}{outname} --checksum ADLER32".format(outname=outname, outdir=outdir)
        print ") >& {taskname}/log_{index}.txt &".format(taskname=taskname,index=index)
        print """# condor_rm -const 'taskname=="{taskname}" && jobnum=="{jobnum}"' """.format(taskname=taskname,jobnum=jobnum)
        print

    if todownload:
        # transpose (so first element is the first file for each job, second is second, etc.; pad with None)
        todownload = map(list,izip_longest(*todownload))
        # get single list filtering out nontruthy stuff
        todownload = filter(None,sum(todownload,[]))
        print "\n".join(todownload)


if __name__ == "__main__":
    # print_commands("546478.0 546478.9")

    selstr = ""
    # selstr = "547165.230 547454.0 547453.0  547553.0 "
    # selstr = "549298.3"

    cids = " ".join(do_cmd("condor_q -w -nobatch %s | grep Run201 | awk '{print $1}'" % selstr).split())
    # print_commands(cids)
    print_commands(cids, localcache=True)
コード例 #12
0
        maker_tasks.append(child_tasks)
        merge_tasks.append(merge_task)
        

    for i in range(100):
        total_summary = {}
        
        for child_tasks, merge_task in zip(maker_tasks,merge_tasks):
            all_child_finishes = True
            for maker_task in child_tasks:
                maker_task.process()
                if not maker_task.complete(): 
                    all_child_finishes = False

            if all_child_finishes:
                do_cmd("mkdir -p {}/merged".format(maker_task.get_outputdir()))
                merge_task.reset_io_mapping()
                merge_task.update_mapping()
                merge_task.process()

            if merge_task.complete():
                outfile = '{}/{}_1.root'.format(merge_task.get_outputdir(), merge_task.output_name.split('.')[0])
                target = outfile.split('resub')[0]
                tmp = merge_task.get_sample().get_datasetname().split('_')
                target += 'stopBaby_{}/{}'.format('_'.join(tmp[1:-2]), merge_task.output_name)
                # print('cp {} {}'.format(outfile, target))
                print('cp {} {}'.format(outfile, target))
                do_cmd('cp {} {}'.format(outfile, target))
                print(target, "finished!!")

            total_summary[maker_task.get_sample().get_datasetname()] = maker_task.get_task_summary()
コード例 #13
0
    gfal-copy -p -f -t 4200 --verbose file://`pwd`/tmp.txt gsiftp://gftp.t2.ucsd.edu${OUTPUTDIR}/${OUTPUTNAME}_${IFILE}.txt --checksum ADLER32
    """)
    exefile.chmod("u+x")

    # Make a CondorTask (3 in total, one for each input)
    task = CondorTask(
        sample=ds,
        files_per_output=1,
        tag="v0",
        output_name="output.txt",
        executable=exefile.get_name(),
        condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"},
        no_load_from_backup=
        True,  # for the purpose of the example, don't use a backup
    )
    do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
コード例 #14
0
ファイル: Task.py プロジェクト: usarica/ProjectMetis
 def get_taskdir(self):
     task_dir = "{0}/tasks/{1}/".format(self.get_basedir(),
                                        self.unique_name)
     if not os.path.exists(task_dir):
         do_cmd("mkdir -p {0}/logs/std_logs/".format(task_dir))
     return os.path.normpath(task_dir)
コード例 #15
0
ファイル: submit.py プロジェクト: usarica/FTInterpretations
def condor_submit(**kwargs): # pragma: no cover
    """
    Takes in various keyword arguments to submit a condor job.
    Returns (succeeded:bool, cluster_id:str)
    fake=True kwarg returns (True, -1)
    multiple=True will let `arguments` and `selection_pairs` be lists (of lists)
    and will queue up one job for each element
    """

    if kwargs.get("fake",False):
        return True, -1

    for needed in ["executable","arguments","inputfiles","logdir"]:
        if needed not in kwargs:
            raise RuntimeError("To submit a proper condor job, please specify: {0}".format(needed))

    params = {}

    queue_multiple = kwargs.get("multiple",False)

    params["universe"] = kwargs.get("universe", "Vanilla")
    params["executable"] = kwargs["executable"]
    # params["inputfiles"] = ",".join(kwargs["inputfiles"])
    params["logdir"] = kwargs["logdir"]
    params["proxy"] = get_proxy_file()
    params["timestamp"] = get_timestamp()


    exe_dir = params["executable"].rsplit("/",1)[0]
    if "/" not in os.path.normpath(params["executable"]):
        exe_dir = "."

    # http://uaf-10.t2.ucsd.edu/~namin/dump/badsites.html
    good_sites = [

                "T2_US_Caltech",
                "T2_US_UCSD",
                "T2_US_MIT",
                "T2_US_Nebraska",
                # "T2_US_Purdue", # Issues with fortran?? even though we're in singularity??
                "T2_US_Vanderbilt",

            ]

    params["sites"] = kwargs.get("sites",",".join(good_sites))

    if queue_multiple:
        if len(kwargs["arguments"]) and (type(kwargs["arguments"][0]) not in [tuple,list]):
            raise RuntimeError("If queueing multiple jobs in one cluster_id, arguments must be a list of lists")
        params["arguments"] = map(lambda x: " ".join(map(str,x)), kwargs["arguments"])
        params["inputfiles"] = map(lambda x: ",".join(map(str,x)), kwargs["inputfiles"])
        params["extra"] = []
        if "selection_pairs" in kwargs:
            sps = kwargs["selection_pairs"]
            if len(sps) != len(kwargs["arguments"]):
                raise RuntimeError("Selection pairs must match argument list in length")
            for sel_pairs in sps:
                extra = ""
                for sel_pair in sel_pairs:
                    if len(sel_pair) != 2:
                        raise RuntimeError("This selection pair is not a 2-tuple: {0}".format(str(sel_pair)))
                    extra += '+{0}="{1}"\n'.format(*sel_pair)
                params["extra"].append(extra)
    else:
        params["arguments"] = " ".join(map(str,kwargs["arguments"]))
        params["inputfiles"] = ",".join(map(str,kwargs["inputfiles"]))
        params["extra"] = ""
        if "selection_pairs" in kwargs:
            for sel_pair in kwargs["selection_pairs"]:
                if len(sel_pair) != 2:
                    raise RuntimeError("This selection pair is not a 2-tuple: {0}".format(str(sel_pair)))
                params["extra"] += '+{0}="{1}"\n'.format(*sel_pair)

    params["proxyline"] = "x509userproxy={proxy}".format(proxy=params["proxy"])

    # Require singularity+cvmfs unless machine is uaf-*. or uafino.
    # NOTE, double {{ and }} because this gets str.format'ted later on
    # Must have singularity&cvmfs. Or, (it must be uaf or uafino computer AND if a uaf computer must not have too high of slotID number
    # so that we don't take all the cores of a uaf
    requirements_line = 'Requirements = ((HAS_SINGULARITY=?=True) && (HAS_CVMFS_cms_cern_ch =?= true)) || (regexp("(uaf-[0-9]{{1,2}}|uafino)\.", TARGET.Machine) && !(TARGET.SlotID>(TotalSlots<14 ? 3:7) && regexp("uaf-[0-9]", TARGET.machine)))'
    if kwargs.get("universe","").strip().lower() in ["local"]:
        kwargs["requirements_line"] = "Requirements = "
    if kwargs.get("requirements_line","").strip():
        requirements_line = kwargs["requirements_line"]

    template = """
universe={universe}
+DESIRED_Sites="{sites}"
executable={executable}
transfer_executable=True
transfer_output_files = ""
+Owner = undefined
+project_Name = \"cmssurfandturf\"
log={logdir}/{timestamp}.log
output={logdir}/std_logs/1e.$(Cluster).$(Process).out
error={logdir}/std_logs/1e.$(Cluster).$(Process).err
notification=Never
should_transfer_files = YES
when_to_transfer_output = ON_EXIT
"""
    template += "{0}\n".format(params["proxyline"])
    template += "{0}\n".format(requirements_line)
    if kwargs.get("stream_logs",False):
        template += "StreamOut=True\nstream_error=True\nTransferOut=True\nTransferErr=True\n"
    for ad in kwargs.get("classads",[]):
        if len(ad) != 2:
            raise RuntimeError("This classad pair is not a 2-tuple: {0}".format(str(ad)))
        template += '+{0}="{1}"\n'.format(*ad)
    do_extra = len(params["extra"]) == len(params["arguments"])
    if queue_multiple:
        template += "\n"
        for ijob,(args,inp) in enumerate(zip(params["arguments"],params["inputfiles"])):
            template += "arguments={0}\n".format(args)
            template += "transfer_input_files={0}\n".format(inp)
            if do_extra:
                template += "{0}\n".format(params["extra"][ijob])
            template += "queue\n"
            template += "\n"
    else:
        template += "arguments={0}\n".format(params["arguments"])
        template += "transfer_input_files={0}\n".format(params["inputfiles"])
        template += "{0}\n".format(params["extra"])
        template += "queue\n"

    if kwargs.get("return_template",False):
        return template.format(**params)

    buff = template.format(**params)

    with open("{0}/submit.cmd".format(exe_dir),"w") as fhout:
        fhout.write(buff)

    extra_cli = ""
    schedd = kwargs.get("schedd","") # see note in condor_q about `schedd`
    if schedd:
        extra_cli += " -name {} ".format(schedd)
    do_cmd("mkdir -p {}/std_logs/ ".format(params["logdir"]))

    # print buff
    # # FIXME
    print "Wrote the file, and now exiting"
    sys.exit()

    out = do_cmd("condor_submit {}/submit.cmd {}".format(exe_dir,extra_cli))


    succeeded = False
    cluster_id = -1
    if "job(s) submitted to cluster" in out:
        succeeded = True
        cluster_id = out.split("submitted to cluster ")[-1].split(".",1)[0].strip()
    else:
        raise RuntimeError("Couldn't submit job to cluster because:\n----\n{0}\n----".format(out))

    return succeeded, cluster_id