def test_file(self): f1 = MutableFile("file_1.root") self.assertEqual(os.path.exists(f1.get_name()), False) f1.touch() self.assertEqual(f1.exists(), True) self.assertEqual(os.path.exists(f1.get_name()), True) f1.rm() self.assertEqual(os.path.exists(f1.get_name()), False)
def test_directory(self): d1 = MutableFile("mf_test/") self.assertEqual(os.path.exists(d1.get_name()), False) d1.touch() self.assertEqual(d1.exists(), True) self.assertEqual(os.path.exists(d1.get_name()), True) d1.rm() self.assertEqual(os.path.exists(d1.get_name()), False)
def test2(): dataset_names = [ # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM", "/Dummy_test_StopBabyMaker_v25/CMS4", ] # Make a base directory basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format( os.getenv("USER")) MutableFile(basedir).touch() # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes # The globber must be customized (by default, it is *.root) in order to pick up the text files ds = DirectorySample(location=basedir, dataset="/TEST/Examplev1/TEST", globber="*.txt") # Make a CondorTask (3 in total, one for each input) task = CondorTask( sample=ds, files_per_output=1, tag="v0", output_name="ttbar_powheg_pythia8_92X.root", executable="condor_executable.sh", cmssw_version="CMSSW_9_3_1", scram_arch="slc6_amd64_gcc700", arguments="testarg1", tarfile="input.tar.gz", condor_submit_params={"sites": "UAF,T2_US_UCSD,UCSB"}, no_load_from_backup= True, # for the purpose of the example, don't use a backup ) # do_cmd("rm -rf {0}".format(task.get_outputdir())) # Process and sleep until complete is_complete = False for t in [5.0, 5.0, 10.0, 15.0, 20.0]: task.process() print("Sleeping for {0} seconds".format(int(t))) time.sleep(t) is_complete = task.complete() if is_complete: break # If it's complete, make a dummy sample out of the output directory # in order to pick up the files. Then cat out the contents and sum # them up. This should be 3*2*10 = 100 if is_complete: print("Job completed! Checking outputs...") outsamp = DirectorySample(location=task.get_outputdir(), dataset="/Blah/blah/BLAH", globber="*.txt") tot = 0 for f in outsamp.get_files(): mf = MutableFile(f.get_name()) tot += int(mf.cat()) print("It looks like we found 3*2*10 = {0}".format(tot))
def test_workflow(self): basepath = "/tmp/{}/metis/".format(os.getenv("USER")) # Clean up before running do_cmd("rm {}/*.root".format(basepath)) # Make the base directory MutableFile(basepath).touch() # Set up 4 layers of input->output files step0, step1, step2, step3 = [], [], [], [] for i in range(3): step0.append( MutableFile(name="{}/step0_{}.root".format(basepath, i))) step1.append( MutableFile(name="{}/step1_{}.root".format(basepath, i))) step2.append( MutableFile(name="{}/step2_{}.root".format(basepath, i))) step3.append( MutableFile(name="{}/step3_{}.root".format(basepath, i))) # Touch the step0 files to ensure they "exist", but they're still empty list(map(lambda x: x.touch(), step0)) # Make a DummyMoveTask with previous inputs, outputs # each input will be moved to the corresponding output file # by default, completion fraction must be 1.0, but can be specified t1 = DummyMoveTask( inputs=step0, outputs=step1, # min_completion_fraction = 0.6, ) # Clone first task for subsequent steps t2 = t1.clone(inputs=step1, outputs=step2) t3 = t1.clone(inputs=step2, outputs=step3) # Make a path, which will run tasks in sequence provided previous tasks # finish. Default dependency graph ("scheduled mode") will make it so # that t2 depends on t1 and t3 depends on t1 pa = Path([t1, t2]) pb = Path([t3]) # Yes, it was silly to make two paths, but that was done to showcase # the following concatenation ability (note that "addition" here is not # commutative) p1 = pa + pb while not p1.complete(): p1.process() time.sleep(0.02) self.assertEqual(p1.complete(), True)
def test_cat(self): f1 = MutableFile("file_2.txt") f1.touch() f1.append("123\n") f1.append("123\n") self.assertEqual(f1.cat(), "123\n123\n") f1.rm()
def test_append(self): f1 = MutableFile("file_2.txt") f1.touch() f1.append("123\n") with open(f1.get_name(), "r") as fhin: self.assertEqual(fhin.read(), "123\n") f1.rm()
def test2(): dataset_names = [ # "/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/RunIISummer17MiniAOD-92X_upgrade2017_realistic_v10_ext1-v1/MINIAODSIM", "/Dummy_test_StopBabyMaker_v25/CMS4", ] # Make a base directory basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format(os.getenv("USER")) MutableFile(basedir).touch() # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes # The globber must be customized (by default, it is *.root) in order to pick up the text files ds = DirectorySample(location=basedir, dataset="/TEST/Examplev1/TEST", globber="*.txt") # Make a CondorTask (3 in total, one for each input) task = CondorTask( sample = ds, files_per_output = 1, tag = "v0", output_name = "ttbar_powheg_pythia8_92X.root", executable = "condor_executable.sh", cmssw_version = "CMSSW_9_3_1", scram_arch = "slc6_amd64_gcc700", arguments = "testarg1", tarfile = "input.tar.gz", condor_submit_params = {"sites": "UAF,T2_US_UCSD,UCSB"}, no_load_from_backup = True, # for the purpose of the example, don't use a backup ) # do_cmd("rm -rf {0}".format(task.get_outputdir())) # Process and sleep until complete is_complete = False for t in [5.0, 5.0, 10.0, 15.0, 20.0]: task.process() print("Sleeping for {0} seconds".format(int(t))) time.sleep(t) is_complete = task.complete() if is_complete: break # If it's complete, make a dummy sample out of the output directory # in order to pick up the files. Then cat out the contents and sum # them up. This should be 3*2*10 = 100 if is_complete: print("Job completed! Checking outputs...") outsamp = DirectorySample(location=task.get_outputdir(), dataset="/Blah/blah/BLAH", globber="*.txt") tot = 0 for f in outsamp.get_files(): mf = MutableFile(f.get_name()) tot += int(mf.cat()) print("It looks like we found 3*2*10 = {0}".format(tot))
def test_workflow(self): import ROOT as r basepath = "/tmp/{}/metis/localmerge/".format(os.getenv("USER")) # Make the base directory MutableFile(basepath).touch() # Clean up before running do_cmd("rm {}/*.root".format(basepath)) for i in range(0, 3): f = r.TFile("{}/in_{}.root".format(basepath, i), "RECREATE") h = r.TH1F() h.Write() f.Close() outname = "/home/users/namin/2017/test/ProjectMetis/testout/out.root" task = LocalMergeTask( # input_filenames=glob.glob("/hadoop/cms/store/user/namin/AutoTwopler_babies/FT_v1.06_v2/W4JetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8_RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1/output/output_4*.root"), input_filenames=glob.glob(basepath + "/in_*.root"), output_filename=basepath + "/out.root", ) task.process() self.assertEqual(task.get_outputs()[0].exists(), True)
def merge_function(self, inputs, output): # make the directory hosting the output if it doesn't exist fdir = output.get_basepath() if not os.path.exists(fdir): Utils.do_cmd("mkdir -p {0}".format(fdir)) # when merging 1 file, TFileMerger defaults to a special case # of just copying the file. this screws up because of an issue # in TUrl and leaves potentially big files in /tmp/ without cleaning # them up later, so do it nonlocally, sigh :( local = True if len(inputs) == 1: local = False if len(inputs) < 5: self.show_progress = False fm = r.TFileMerger(local) fm.OutputFile(output.get_name()) fm.SetFastMethod(True) fm.SetMaxOpenedFiles(400) fm.SetPrintLevel(0) ngood = 0 ntotal = len(inputs) self.logger.info("Adding {0} files to be merged".format(ntotal)) if self.show_progress: try: from tqdm import tqdm inputs = tqdm(inputs) except: pass t0 = time.time() for inp in inputs: if self.ignore_bad: if not inp.exists(): continue ngood += fm.AddFile(inp.get_name(), False) if self.show_progress: fm.PartialMerge(r.TFileMerger.kIncremental | r.TFileMerger.kAll) if not self.ignore_bad and (ngood != ntotal): MutableFile(output).rm() raise RuntimeError( "Tried to merge {0} files into {1}, but only {2} of them got included properly" .format(len(inputs), output.get_name(), ngood)) if not self.show_progress: fm.Merge() t1 = time.time() sizemb = output.get_filesizeMB() self.logger.info( "Done merging files into {} ({:.1f}MB). Took {:.2f} secs @ {:.1f}MB/s" .format(output.get_name(), sizemb, t1 - t0, sizemb / (t1 - t0)))
def test_textfile(self): dsname = "/blah/blah/BLAH/" fname = "tfsampletest.tmp" # make a temporary file putting in some dummy filenames # to be picked up by FilelistSample mf = MutableFile(fname) mf.touch() nfiles = 3 for i in range(1, nfiles + 1): mf.append("ntuple{}.root\n".format(i)) tfsamp = FilelistSample(dataset=dsname, filelist=fname) self.assertEqual(len(tfsamp.get_files()), nfiles) # clean up mf.rm()
from __future__ import print_function from metis.File import MutableFile """ Showcases some file operations normally done using the os module, but nicely wrapped in the MutableFile object """ if __name__ == "__main__": # Make a mutable file object fo = MutableFile("mutablefile_test.txt") # Touch the file to guarantee its existence fo.touch() # Does it exist? Hint: yes print("File exists?", fo.exists()) # What are the current permissions? print("Permissions =", fo.chmod()) # Add some text to it fo.append("test text\n") fo.append("more text") # And cat it out print("---- Begin contents --->") print(fo.cat()) print("<--- End contents ----") # Clean up by removing the file
def test_chmod(self): f1 = MutableFile("chmodtest.txt") f1.touch() f1.chmod(644) self.assertEqual(f1.chmod(), 644) f1.chmod("u+x") self.assertEqual(f1.chmod(), 744) f1.rm()
from metis.CondorTask import CondorTask from metis.Utils import do_cmd """ Let's say we want to compute 3*2*10. I know the answer is 60, only because I have calculated it using this example. Since this is an involved calculation, we will make 3 text files containing the number 10. We will submit condor jobs, one per text file, which will take the file, multiply the content by two, and copy back an output file. When the jobs are complete, we will sum up the outputs. """ if __name__ == "__main__": # Make a base directory basedir = "/hadoop/cms/store/user/{0}/metis_test/example/".format( os.getenv("USER")) MutableFile(basedir).touch() # Make 3 text files (file_<i>.txt) in the base directory and fill them with text "10" mfs = [] for i in range(3): mf = MutableFile("{0}/file_{1}.txt".format(basedir, i)) mf.rm() mf.append("10\n") mfs.append(mf) # Make a directory sample, giving it the location and a dataset name for bookkeeping purposes # The globber must be customized (by default, it is *.root) in order to pick up the text files ds = DirectorySample(location=basedir, dataset="/TEST/Examplev1/TEST", globber="*.txt")