Esempio n. 1
0
 def test_file(self):
     f1 = MutableFile("file_1.root")
     self.assertEqual(os.path.exists(f1.get_name()), False)
     f1.touch()
     self.assertEqual(f1.exists(), True)
     self.assertEqual(os.path.exists(f1.get_name()), True)
     f1.rm()
     self.assertEqual(os.path.exists(f1.get_name()), False)
Esempio n. 2
0
 def test_directory(self):
     d1 = MutableFile("mf_test/")
     self.assertEqual(os.path.exists(d1.get_name()), False)
     d1.touch()
     self.assertEqual(d1.exists(), True)
     self.assertEqual(os.path.exists(d1.get_name()), True)
     d1.rm()
     self.assertEqual(os.path.exists(d1.get_name()), False)
Esempio n. 3
0
def test2():
    dataset_names = [
        # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM",
        "/Dummy_test_StopBabyMaker_v25/CMS4",
    ]

    # Make a base directory
    basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(
        os.getenv("USER"))
    MutableFile(basedir).touch()

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files
    ds = DirectorySample(location=basedir,
                         dataset="/TEST/Examplev1/TEST",
                         globber="*.txt")

    # Make a CondorTask (3 in total, one for each input)
    task = CondorTask(
        sample=ds,
        files_per_output=1,
        tag="v0",
        output_name="ttbar_powheg_pythia8_92X.root",
        executable="condor_executable.sh",
        cmssw_version="CMSSW_9_3_1",
        scram_arch="slc6_amd64_gcc700",
        arguments="testarg1",
        tarfile="input.tar.gz",
        condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"},
        no_load_from_backup=
        True,  # for the purpose of the example, don't use a backup
    )
    # do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(),
                                  dataset="/Blah/blah/BLAH",
                                  globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
    def test_workflow(self):

        basepath = "/tmp/{}/metis/".format(os.getenv("USER"))

        # Clean up before running
        do_cmd("rm {}/*.root".format(basepath))

        # Make the base directory
        MutableFile(basepath).touch()

        # Set up 4 layers of input->output files
        step0, step1, step2, step3 = [], [], [], []
        for i in range(3):
            step0.append(
                MutableFile(name="{}/step0_{}.root".format(basepath, i)))
            step1.append(
                MutableFile(name="{}/step1_{}.root".format(basepath, i)))
            step2.append(
                MutableFile(name="{}/step2_{}.root".format(basepath, i)))
            step3.append(
                MutableFile(name="{}/step3_{}.root".format(basepath, i)))

        # Touch the step0 files to ensure they "exist", but they're still empty
        list(map(lambda x: x.touch(), step0))

        # Make a DummyMoveTask with previous inputs, outputs
        # each input will be moved to the corresponding output file
        # by default, completion fraction must be 1.0, but can be specified
        t1 = DummyMoveTask(
            inputs=step0,
            outputs=step1,
            # min_completion_fraction = 0.6,
        )

        # Clone first task for subsequent steps
        t2 = t1.clone(inputs=step1, outputs=step2)
        t3 = t1.clone(inputs=step2, outputs=step3)

        # Make a path, which will run tasks in sequence provided previous tasks
        # finish. Default dependency graph ("scheduled mode") will make it so
        # that t2 depends on t1 and t3 depends on t1
        pa = Path([t1, t2])
        pb = Path([t3])

        # Yes, it was silly to make two paths, but that was done to showcase
        # the following concatenation ability (note that "addition" here is not
        # commutative)
        p1 = pa + pb

        while not p1.complete():
            p1.process()

            time.sleep(0.02)

        self.assertEqual(p1.complete(), True)
Esempio n. 5
0
 def test_cat(self):
     f1 = MutableFile("file_2.txt")
     f1.touch()
     f1.append("123\n")
     f1.append("123\n")
     self.assertEqual(f1.cat(), "123\n123\n")
     f1.rm()
Esempio n. 6
0
 def test_append(self):
     f1 = MutableFile("file_2.txt")
     f1.touch()
     f1.append("123\n")
     with open(f1.get_name(), "r") as fhin:
         self.assertEqual(fhin.read(), "123\n")
     f1.rm()
Esempio n. 7
0
def test2():
    dataset_names = [
        # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM",
        "/Dummy_test_StopBabyMaker_v25/CMS4",
    ]

    # Make a base directory
    basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(os.getenv("USER"))
    MutableFile(basedir).touch()

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files
    ds = DirectorySample(location=basedir, dataset="/TEST/Examplev1/TEST", globber="*.txt")

    # Make a CondorTask (3 in total, one for each input)
    task = CondorTask(
            sample = ds,
            files_per_output = 1,
            tag = "v0",
            output_name = "ttbar_powheg_pythia8_92X.root",
            executable = "condor_executable.sh",
            cmssw_version = "CMSSW_9_3_1",
            scram_arch = "slc6_amd64_gcc700",
            arguments = "testarg1",
            tarfile = "input.tar.gz",
            condor_submit_params = {"sites": "UAF,T2_US_UCSD,UCSB"},
            no_load_from_backup = True, # for the purpose of the example, don't use a backup
    )
    # do_cmd("rm -rf {0}".format(task.get_outputdir()))

    # Process and sleep until complete
    is_complete = False
    for t in [5.0, 5.0, 10.0, 15.0, 20.0]:
        task.process()
        print("Sleeping for {0} seconds".format(int(t)))
        time.sleep(t)
        is_complete = task.complete()
        if is_complete: break

    # If it's complete, make a dummy sample out of the output directory
    # in order to pick up the files. Then cat out the contents and sum
    # them up. This should be 3*2*10 = 100
    if is_complete:
        print("Job completed! Checking outputs...")
        outsamp = DirectorySample(location=task.get_outputdir(), dataset="/Blah/blah/BLAH", globber="*.txt")
        tot = 0
        for f in outsamp.get_files():
            mf = MutableFile(f.get_name())
            tot += int(mf.cat())
        print("It looks like we found 3*2*10 = {0}".format(tot))
    def test_workflow(self):

        import ROOT as r

        basepath = "/tmp/{}/metis/localmerge/".format(os.getenv("USER"))

        # Make the base directory
        MutableFile(basepath).touch()

        # Clean up before running
        do_cmd("rm {}/*.root".format(basepath))

        for i in range(0, 3):
            f = r.TFile("{}/in_{}.root".format(basepath, i), "RECREATE")
            h = r.TH1F()
            h.Write()
            f.Close()

        outname = "/home/users/namin/2017/test/ProjectMetis/testout/out.root"
        task = LocalMergeTask(
            # input_filenames=glob.glob("/hadoop/cms/store/user/namin/AutoTwopler_babies/FT_v1.06_v2/W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8_RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/output/output_4*.root"),
            input_filenames=glob.glob(basepath + "/in_*.root"),
            output_filename=basepath + "/out.root",
        )

        task.process()

        self.assertEqual(task.get_outputs()[0].exists(), True)
Esempio n. 9
0
    def merge_function(self, inputs, output):
        # make the directory hosting the output if it doesn't exist
        fdir = output.get_basepath()
        if not os.path.exists(fdir): Utils.do_cmd("mkdir -p {0}".format(fdir))

        # when merging 1 file, TFileMerger defaults to a special case
        # of just copying the file. this screws up because of an issue
        # in TUrl and leaves potentially big files in /tmp/ without cleaning
        # them up later, so do it nonlocally, sigh :(
        local = True
        if len(inputs) == 1: local = False
        if len(inputs) < 5: self.show_progress = False
        fm = r.TFileMerger(local)
        fm.OutputFile(output.get_name())
        fm.SetFastMethod(True)
        fm.SetMaxOpenedFiles(400)
        fm.SetPrintLevel(0)
        ngood = 0
        ntotal = len(inputs)
        self.logger.info("Adding {0} files to be merged".format(ntotal))

        if self.show_progress:
            try:
                from tqdm import tqdm
                inputs = tqdm(inputs)
            except:
                pass

        t0 = time.time()

        for inp in inputs:
            if self.ignore_bad:
                if not inp.exists(): continue
            ngood += fm.AddFile(inp.get_name(), False)
            if self.show_progress:
                fm.PartialMerge(r.TFileMerger.kIncremental
                                | r.TFileMerger.kAll)

        if not self.ignore_bad and (ngood != ntotal):
            MutableFile(output).rm()
            raise RuntimeError(
                "Tried to merge {0} files into {1}, but only {2} of them got included properly"
                .format(len(inputs), output.get_name(), ngood))

        if not self.show_progress:
            fm.Merge()

        t1 = time.time()
        sizemb = output.get_filesizeMB()

        self.logger.info(
            "Done merging files into {} ({:.1f}MB). Took {:.2f} secs @ {:.1f}MB/s"
            .format(output.get_name(), sizemb, t1 - t0, sizemb / (t1 - t0)))
Esempio n. 10
0
    def test_textfile(self):
        dsname = "/blah/blah/BLAH/"
        fname = "tfsampletest.tmp"

        # make a temporary file putting in some dummy filenames
        # to be picked up by FilelistSample
        mf = MutableFile(fname)
        mf.touch()
        nfiles = 3
        for i in range(1, nfiles + 1):
            mf.append("ntuple{}.root\n".format(i))
        tfsamp = FilelistSample(dataset=dsname, filelist=fname)
        self.assertEqual(len(tfsamp.get_files()), nfiles)

        # clean up
        mf.rm()
Esempio n. 11
0
from __future__ import print_function

from metis.File import MutableFile
"""
Showcases some file operations normally done using the os
module, but nicely wrapped in the MutableFile object
"""
if __name__ == "__main__":

    # Make a mutable file object
    fo = MutableFile("mutablefile_test.txt")

    # Touch the file to guarantee its existence
    fo.touch()

    # Does it exist? Hint: yes
    print("File exists?", fo.exists())

    # What are the current permissions?
    print("Permissions =", fo.chmod())

    # Add some text to it
    fo.append("test text\n")
    fo.append("more text")

    # And cat it out
    print("---- Begin contents --->")
    print(fo.cat())
    print("<--- End contents ----")

    # Clean up by removing the file
Esempio n. 12
0
 def test_chmod(self):
     f1 = MutableFile("chmodtest.txt")
     f1.touch()
     f1.chmod(644)
     self.assertEqual(f1.chmod(), 644)
     f1.chmod("u+x")
     self.assertEqual(f1.chmod(), 744)
     f1.rm()
Esempio n. 13
0
from metis.CondorTask import CondorTask
from metis.Utils import do_cmd
"""
Let's say we want to compute 3*2*10. I know the answer is 60, only because I
have calculated it using this example. Since this is an involved calculation,
we will make 3 text files containing the number 10. We will submit condor jobs,
one per text file, which will take the file, multiply the content by two, and
copy back an output file. When the jobs are complete, we will sum up the outputs.
"""

if __name__ == "__main__":

    # Make a base directory
    basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(
        os.getenv("USER"))
    MutableFile(basedir).touch()

    # Make 3 text files (file_<i>.txt) in the base directory and fill them with text "10"
    mfs = []
    for i in range(3):
        mf = MutableFile("{0}/file_{1}.txt".format(basedir, i))
        mf.rm()
        mf.append("10\n")
        mfs.append(mf)

    # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes
    # The globber must be customized (by default, it is *.root) in order to pick up the text files
    ds = DirectorySample(location=basedir,
                         dataset="/TEST/Examplev1/TEST",
                         globber="*.txt")