def submit_job(brunel_app, reco_type, input_files=None, local=RUN_LOCAL): # Set EvtMax depending on if this is a local job brunel_app.extraOpts += 'from Configurables import Brunel\n' brunel_app.extraOpts += 'Brunel().EvtMax = {}'.format(2 * int(local) - 1) # Configure the corresponding Job job = Job(name='VP hybrid distortions', comment='{reco_type} reconstruction {suffix}'.format( reco_type=reco_type, suffix=['', '(local)'][local]), application=brunel_app, splitter=SplitByFiles(filesPerJob=1, ignoremissing=True), parallel_submit=True) if local: job.backend = Local() job.outputfiles = [LocalFile('*.xdst'), LocalFile('*.root')] job.inputdata = dataset[:1] else: job.backend = Dirac() job.outputfiles = [DiracFile('*.xdst'), DiracFile('*.root')] job.inputdata = dataset job.inputfiles = input_files or [] queues.add(job.submit) return True
def test_g_Splitters(self): from Ganga.GPI import Job, GenericSplitter, GangaDataset, GangaDatasetSplitter, LocalFile # -- SPLITTERS BASICUSE START j = Job() j.splitter = GenericSplitter() j.splitter.attribute = 'application.args' j.splitter.values = [['hello', 1], ['world', 2], ['again', 3]] j.submit() # -- SPLITTERS BASICUSE STOP # -- SPLITTERS SUBJOBCHECK START j.subjobs j.subjobs(0).peek("stdout") # -- SPLITTERS SUBJOBCHECK STOP # -- SPLITTERS MULTIATTRS START j = Job() j.splitter = GenericSplitter() j.splitter.multi_attrs = {'application.args': ['hello1', 'hello2'], 'application.env': [{'MYENV':'test1'}, {'MYENV':'test2'}]} j.submit() # -- SPLITTERS MULTIATTRS STOP # -- SPLITTERS DATASETSPLITTER START j = Job() j.application.exe = 'more' j.application.args = ['__GangaInputData.txt__'] j.inputdata = GangaDataset( files=[ LocalFile('*.txt') ] ) j.splitter = GangaDatasetSplitter() j.splitter.files_per_subjob = 2 j.submit()
def testDatasets(self): from Ganga.GPI import DiracFile, PhysicalFile, LHCbDataset, Job, LocalFile # test constructors/setters ds = LHCbDataset(['lfn:a', 'pfn:b']) assert len(ds) == 2 print(ds[0]) assert isinstance(ds[0], DiracFile) assert isinstance(ds[1], PhysicalFile) ds = LHCbDataset() ds.files = ['lfn:a', 'pfn:b'] assert isinstance(ds[0], DiracFile) assert isinstance(ds[1], PhysicalFile) ds.files.append('lfn:c') assert isinstance(ds[-1], DiracFile) d = OutputData(['a', 'b']) assert isinstance(d.files[0],str) assert isinstance(d.files[1],str) # check job assignments j = Job() j.inputdata = ['lfn:a', 'pfn:b'] assert isinstance(j.inputdata, LHCbDataset) j.outputfiles = ['a', DiracFile('b')] assert isinstance(j.outputfiles[0], LocalFile) print(type(j.outputfiles[1])) assert isinstance(j.outputfiles[1], DiracFile) LFN_DATA = ['LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00016768/0000/00016768_00000005_1.dimuon.dst', 'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00016768/0000/00016768_00000006_1.dimuon.dst'] ds = LHCbDataset(LFN_DATA) assert len(ds.getReplicas().keys()) == 2 assert ds.getCatalog()
def testJobCopy(self): """Test that a job copy copies everything properly""" from Ganga.GPI import Job, ARC, GenericSplitter, GangaDataset, LocalFile, FileChecker from Ganga.GPIDev.Base.Proxy import isType j = Job() j.application.exe = "sleep" j.application.args = ['myarg'] j.backend = ARC() j.backend.CE = "my.ce" j.inputdata = GangaDataset() j.inputdata.files = [ LocalFile("*.txt") ] j.inputfiles = [ LocalFile("*.txt") ] j.name = "testname" j.outputfiles = [ LocalFile("*.txt") ] j.postprocessors = FileChecker(files=['stdout'], searchStrings = ['my search']) j.splitter = GenericSplitter() j.splitter.attribute = "application.args" j.splitter.values = ['arg 1', 'arg 2', 'arg 3'] j2 = j.copy() # test the copy has worked self.assertTrue( isType(j2, Job) ) self.assertEqual( j2.application.exe, "sleep" ) self.assertEqual( j2.application.args, ["myarg"] ) self.assertTrue( isType(j2.backend, ARC) ) self.assertEqual( j2.backend.CE, "my.ce" ) self.assertTrue( isType(j2.inputdata, GangaDataset) ) self.assertEqual( len(j2.inputdata.files), 1 ) self.assertTrue( isType(j2.inputdata.files[0], LocalFile) ) self.assertEqual( j2.inputdata.files[0].namePattern, "*.txt" ) self.assertEqual( len(j2.inputfiles), 1 ) self.assertTrue( isType(j2.inputfiles[0], LocalFile) ) self.assertEqual( j2.inputfiles[0].namePattern, "*.txt" ) self.assertEqual( j2.name, "testname" ) self.assertEqual( len(j2.outputfiles), 1 ) self.assertTrue( isType(j2.outputfiles[0], LocalFile) ) self.assertEqual( j2.outputfiles[0].namePattern, "*.txt" ) self.assertEqual( len(j2.postprocessors), 1 ) self.assertTrue( isType(j2.postprocessors[0], FileChecker) ) self.assertEqual( j2.postprocessors[0].files, ["stdout"] ) self.assertEqual( j2.postprocessors[0].searchStrings, ["my search"] ) self.assertTrue( isType(j2.splitter, GenericSplitter) ) self.assertEqual( j2.splitter.attribute, "application.args" ) self.assertEqual( j2.splitter.values, ['arg 1', 'arg 2', 'arg 3'])
def testInterfaceLookFeel(self): # Just testing that the job construction works from Ganga.GPI import Job, Im3ShapeApp, Im3ShapeSplitter, DiracFile, LocalFile, GangaDataset, Dirac j=Job() app = Im3ShapeApp( im3_location=DiracFile(lfn='/lsst/y1a1-v2-z/software/2016-02-24/im3shape-grid.tar.gz'), ini_location=LocalFile('/afs/cern.ch/user/r/rcurrie/cmtuser/GANGA/GANGA_LSST/install/ganga/python/params_disc.ini'), blacklist=LocalFile('/afs/cern.ch/user/r/rcurrie/cmtuser/GANGA/GANGA_LSST/install/ganga/python/blacklist-y1.txt') ) j.application = app j.backend=Dirac() mydata=GangaDataset() mydata.append(DiracFile(lfn='/lsst/DES0005+0043-z-meds-y1a1-gamma.fits.fz')) j.inputdata = mydata j.splitter=Im3ShapeSplitter(size=20) j.outputfiles = [DiracFile('*.main.txt'), DiracFile('*.epoch.txt')]
def testDatasetsFunctions(self): from Ganga.GPI import DiracFile, PhysicalFile, LHCbDataset, Job, LocalFile # test constructors/setters ds = LHCbDataset(['lfn:a', 'pfn:b']) assert len(ds) == 2 print(ds[0]) assert isinstance(ds[0], DiracFile) assert isinstance(ds[1], LocalFile) ds = LHCbDataset() ds.files = ['lfn:a', 'pfn:b'] assert isinstance(ds[0], DiracFile) assert isinstance(ds[1], LocalFile) assert ds.getFullFileNames() == ['LFN:a', 'PFN:b'] ds.files.append('lfn:c') assert isinstance(ds[-1], DiracFile) # check job assignments j = Job() j.inputdata = ['lfn:a', 'pfn:b'] assert isinstance(j.inputdata, LHCbDataset) j.outputfiles = ['a', DiracFile('b')] assert isinstance(j.outputfiles[0], LocalFile) print(type(j.outputfiles[1])) assert isinstance(j.outputfiles[1], DiracFile) # check the LHCbDataset functions: assert ds.getLFNs() == ['a', 'c'] assert ds.getPFNs() == ['b'] ds2 = LHCbDataset(['lfn:a', 'lfn:d']) ds.extend(ds2, True) assert len(ds) == 4 # check the useful difference functions etc assert sorted(ds.difference(ds2).getFileNames()) == ['b', 'c'] assert sorted(ds.symmetricDifference(ds2).getFileNames()) == ['b', 'c'] assert sorted(ds.intersection(ds2).getFileNames()) == ['a', 'd'] assert sorted(ds.union(ds2).getFileNames()) == ['a', 'b', 'c', 'd']
def test_g_Splitters(self): from Ganga.GPI import Job, GenericSplitter, GangaDataset, GangaDatasetSplitter, LocalFile # -- SPLITTERS BASICUSE START j = Job() j.splitter = GenericSplitter() j.splitter.attribute = 'application.args' j.splitter.values = [['hello', 1], ['world', 2], ['again', 3]] j.submit() # -- SPLITTERS BASICUSE STOP # -- SPLITTERS SUBJOBCHECK START j.subjobs j.subjobs(0).peek("stdout") # -- SPLITTERS SUBJOBCHECK STOP # -- SPLITTERS MULTIATTRS START j = Job() j.splitter = GenericSplitter() j.splitter.multi_attrs = { 'application.args': ['hello1', 'hello2'], 'application.env': [{ 'MYENV': 'test1' }, { 'MYENV': 'test2' }] } j.submit() # -- SPLITTERS MULTIATTRS STOP # -- SPLITTERS DATASETSPLITTER START j = Job() j.application.exe = 'more' j.application.args = ['__GangaInputData.txt__'] j.inputdata = GangaDataset(files=[LocalFile('*.txt')]) j.splitter = GangaDatasetSplitter() j.splitter.files_per_subjob = 2 j.submit()
def testDatasets(self): from Ganga.GPI import DiracFile, PhysicalFile, LHCbDataset, Job, LocalFile # test constructors/setters ds = LHCbDataset(['lfn:a', 'pfn:b']) assert len(ds) == 2 print(ds[0]) assert isinstance(ds[0], DiracFile) assert isinstance(ds[1], PhysicalFile) ds = LHCbDataset() ds.files = ['lfn:a', 'pfn:b'] assert isinstance(ds[0], DiracFile) assert isinstance(ds[1], PhysicalFile) ds.files.append('lfn:c') assert isinstance(ds[-1], DiracFile) d = OutputData(['a', 'b']) assert isinstance(d.files[0], str) assert isinstance(d.files[1], str) # check job assignments j = Job() j.inputdata = ['lfn:a', 'pfn:b'] assert isinstance(j.inputdata, LHCbDataset) j.outputfiles = ['a', DiracFile('b')] assert isinstance(j.outputfiles[0], LocalFile) print(type(j.outputfiles[1])) assert isinstance(j.outputfiles[1], DiracFile) LFN_DATA = [ 'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00016768/0000/00016768_00000005_1.dimuon.dst', 'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00016768/0000/00016768_00000006_1.dimuon.dst' ] ds = LHCbDataset(LFN_DATA) assert len(ds.getReplicas().keys()) == 2 assert ds.getCatalog()
def makeIPResolutionsJob( jobName, dataFile, brunelVersion="", dataType = '2012', extraFiles = [], ddDBTag = None, condDBTag = None ) : """Call this method to make a job that will run Brunel with the IP resolutions ntupling algorithm, using the given config file (data type, etc) and data file. Add this method to your ~/.ganga.py file to have it automatically loaded whenever you run ganga.""" print "Creating an IP resolutions monitoring job named \'%s\'" % jobName dataFile = os.path.expandvars(dataFile) if not os.path.exists(dataFile) : print "Could not find the data file \"%s\"!" % dataFile return None dataFile = os.path.abspath(dataFile) print "Using data file \'%s\'" % dataFile print "Parsing data file for options." stepInfos = parseDataFileForSteps(dataFile) productionOptsFiles = None if len(stepInfos) > 0 : for step in stepInfos : # Get any additional options files used for the Brunel step. if step['ApplicationName'] == "Brunel" : productionOptsFiles = [] for optsFile in step['OptionFiles'].split(";") : productionOptsFiles.append(optsFile.replace(" \n","")) # Get the DB tags used for the Gauss step. This should be the # same as for the Brunel step but it seems there's a bug in bkk. if step['ApplicationName'] == "Gauss" or step['ApplicationName'] == 'DaVinci' or step['ApplicationName'] == 'Brunel' : if ddDBTag == None : ddDBTag = step['DDDB'] if condDBTag == None : condDBTag = step['CONDDB'] if None == ddDBTag and len(extraFiles) == 0 : print "The DB tags could not be retrieved from the data file and no extra options\ files have been specified! The job cannot be made." return None j = Job( name = jobName ) if brunelVersion == "" : j.application = Brunel() else : j.application = Brunel(version=brunelVersion) j.application.extraopts = 'Brunel().OutputLevel = 5\nBrunel().PrintFreq = 10000\nBrunel().DataType = "{0}"\n'.format(dataType) if None != ddDBTag : print "Using DDDBTag \"%s\" and CondDBTag \"%s\"" % (ddDBTag, condDBTag) j.application.extraopts += "\nBrunel().DDDBtag = \"%s\"\nBrunel().CondDBtag = \"%s\"\n" % (ddDBTag, condDBTag) if 'sim' in ddDBTag.lower() : j.application.extraopts += '\nBrunel().Simulation = True\n' #j.application.extraopts += '\nBrunel().WithMC = True\n' else : print "The DB tags could not be retrieved from the data file." print "If they are not defined in one of the extra options files default values will be used." j.application.optsfile = [ mainIPConfigFile ] + extraFiles print "Using options files:" for optsFile in j.application.optsfile : print optsFile.name if None != productionOptsFiles and len(productionOptsFiles) > 0 : for optsFile in productionOptsFiles : print optsFile j.application.extraopts += "\nimportOptions(\"%s\")\n" % optsFile print "Reading in data ..." j.inputdata = j.application.readInputData( dataFile ) print "Data read. %s files found." % len(j.inputdata.files) if len(j.inputdata.files) > 0 : j.application.extraopts += '\nBrunel().InputType = "{0}"\n'.format(j.inputdata.files[0].name.split('.')[-1].upper()) j.splitter = SplitByFiles( filesPerJob = 10 ) j.backend = Dirac() j.outputfiles = [DiracFile('*.root')] return j
def test_f_InputAndOutputData(self): from Ganga.GPI import Job, File, LocalFile, GangaDataset, Local, plugins # -- INPUTANDOUTPUTDATA BASIC START # create a script to send open('my_script2.sh', 'w').write("""#!/bin/bash ls -ltr more "my_input.txt" echo "TESTING" > my_output.txt """) import os os.system('chmod +x my_script2.sh') # create a script to send open('my_input.txt', 'w').write('Input Testing works!') j = Job() j.application.exe = File('my_script2.sh') j.inputfiles = [ LocalFile('my_input.txt') ] j.outputfiles = [ LocalFile('my_output.txt') ] j.submit() # -- INPUTANDOUTPUTDATA BASIC STOP # -- INPUTANDOUTPUTDATA PEEKOUTPUT START j.peek() # list output dir contents j.peek('my_output.txt') # -- INPUTANDOUTPUTDATA PEEKOUTPUT STOP # -- INPUTANDOUTPUTDATA FAILJOB START # This job will fail j = Job() j.application.exe = File('my_script2.sh') j.inputfiles = [ LocalFile('my_input.txt') ] j.outputfiles = [ LocalFile('my_output_FAIL.txt') ] j.submit() # -- INPUTANDOUTPUTDATA FAILJOB STOP # -- INPUTANDOUTPUTDATA WILDCARD START # This job will pick up both 'my_input.txt' and 'my_output.txt' j = Job() j.application.exe = File('my_script2.sh') j.inputfiles = [LocalFile('my_input.txt')] j.outputfiles = [LocalFile('*.txt')] j.submit() # -- INPUTANDOUTPUTDATA WILDCARD STOP # -- INPUTANDOUTPUTDATA OUTPUTFILES START j.outputfiles # -- INPUTANDOUTPUTDATA OUTPUTFILES STOP # -- INPUTANDOUTPUTDATA INPUTDATA START # Create a test script open('my_script3.sh', 'w').write("""#!/bin/bash echo $PATH ls -ltr more __GangaInputData.txt__ echo "MY TEST FILE" > output_file.txt """) import os os.system('chmod +x my_script3.sh') # Submit a job j = Job() j.application.exe = File('my_script3.sh') j.inputdata = GangaDataset(files=[LocalFile('*.sh')]) j.backend = Local() j.submit() # -- INPUTANDOUTPUTDATA INPUTDATA STOP # -- INPUTANDOUTPUTDATA GANGAFILES START plugins('gangafiles')
def test_f_InputAndOutputData(self): from Ganga.GPI import Job, File, LocalFile, GangaDataset, Local, plugins # -- INPUTANDOUTPUTDATA BASIC START # create a script to send open('my_script2.sh', 'w').write("""#!/bin/bash ls -ltr more "my_input.txt" echo "TESTING" > my_output.txt """) import os os.system('chmod +x my_script2.sh') # create a script to send open('my_input.txt', 'w').write('Input Testing works!') j = Job() j.application.exe = File('my_script2.sh') j.inputfiles = [LocalFile('my_input.txt')] j.outputfiles = [LocalFile('my_output.txt')] j.submit() # -- INPUTANDOUTPUTDATA BASIC STOP # -- INPUTANDOUTPUTDATA PEEKOUTPUT START j.peek() # list output dir contents j.peek('my_output.txt') # -- INPUTANDOUTPUTDATA PEEKOUTPUT STOP # -- INPUTANDOUTPUTDATA FAILJOB START # This job will fail j = Job() j.application.exe = File('my_script2.sh') j.inputfiles = [LocalFile('my_input.txt')] j.outputfiles = [LocalFile('my_output_FAIL.txt')] j.submit() # -- INPUTANDOUTPUTDATA FAILJOB STOP # -- INPUTANDOUTPUTDATA WILDCARD START # This job will pick up both 'my_input.txt' and 'my_output.txt' j = Job() j.application.exe = File('my_script2.sh') j.inputfiles = [LocalFile('my_input.txt')] j.outputfiles = [LocalFile('*.txt')] j.submit() # -- INPUTANDOUTPUTDATA WILDCARD STOP # -- INPUTANDOUTPUTDATA OUTPUTFILES START j.outputfiles # -- INPUTANDOUTPUTDATA OUTPUTFILES STOP # -- INPUTANDOUTPUTDATA INPUTDATA START # Create a test script open('my_script3.sh', 'w').write("""#!/bin/bash echo $PATH ls -ltr more __GangaInputData.txt__ echo "MY TEST FILE" > output_file.txt """) import os os.system('chmod +x my_script3.sh') # Submit a job j = Job() j.application.exe = File('my_script3.sh') j.inputdata = GangaDataset(files=[LocalFile('*.sh')]) j.backend = Local() j.submit() # -- INPUTANDOUTPUTDATA INPUTDATA STOP # -- INPUTANDOUTPUTDATA GANGAFILES START plugins('gangafiles')
print('Options files:' + ' '.join([s.format(path=base, year=year) for s in OPTIONS])) # NOQA j = Job(name=JNAME.format( polarity, year, mode )) j.comment = ( '{1} {2} MC {0} ntuple creation for k3pi mixing measurement.' .format(event_type, year, polarity) ) j.application = DaVinci(version='v41r3') j.application.optsfile = [s.format(path=base, year=year) for s in OPTIONS] if args.test: # If testing, run over a couple of files locally, # saving the results to the sandbox j.inputdata = dataset[0:1] j.backend = Local() # Prepend test string to job name j.name = 'TEST_{0}'.format(j.name) j.outputfiles = [LocalFile(tfn)] else: # If not testing, run over everything on the grid, splitting jobs # into groups of 10 files, notifying me on job completion/subjob failure, # and save the results on the grid storage j.inputdata = dataset j.backend = Dirac() j.backend.settings['CPUTime'] = 60*60*24*7 j.do_auto_resubmit = True j.splitter = SplitByFiles(filesPerJob=5, ignoremissing=True) j.postprocessors = [Notifier(address=email)] j.outputfiles = [DiracFile(tfn)]