def test_getOutputSandbox(db, mocker): mocker.patch('Ganga.GPIDev.Credentials.credential_store') j = Job() j.id = 0 j.backend = db db._parent = j db.id = 1234 temp_dir = j.getOutputWorkspace().getPath() with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=True) as execute: assert db.getOutputSandbox(), 'didn\'t run' execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % temp_dir, cred_req=mocker.ANY) test_dir = 'test_dir' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=True) as execute: assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir' execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % test_dir, cred_req=mocker.ANY) with patch('GangaDirac.Lib.Backends.DiracBase.execute', side_effect=GangaDiracError('test Exception')) as execute: assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully' execute.assert_called_once()
def test_getOutputDataLFNs(db): j = Job() j.id = 0 j.backend = db db._parent = j ####################### class TestFile(object): def __init__(self, lfn): self.lfn = lfn ####################### def fake_outputfiles_iterator(job, file_type): assert isinstance(job, Job) if subjob: assert job.master is not None else: assert job.master is None assert file_type == DiracFile return [TestFile('a'), TestFile(''), TestFile('b'), TestFile(''), TestFile('c'), TestFile('')] with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator): subjob = False assert db.getOutputDataLFNs() == ['a', 'b', 'c'] j.subjobs = [Job(), Job(), Job()] for sj in j.subjobs: sj._setParent(j) subjob = True assert db.getOutputDataLFNs() == ['a', 'b', 'c'] * 3
def test__common_submit(tmpdir, db, mocker): mocker.patch('Ganga.GPIDev.Credentials.credential_store') from Ganga.Core import BackendError j = Job() j.id = 0 j.backend = db db._parent = j name = str(tmpdir.join('submit_script')) with open(name, 'w') as fd: fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']]))) with patch('GangaDirac.Lib.Backends.DiracBase.execute', side_effect=GangaDiracError('test Exception')): db.id = 1234 db.actualCE = 'test' db.status = 'test' with pytest.raises(BackendError): db._common_submit(name) assert db.id is None, 'id not None' assert db.actualCE is None, 'actualCE not None' assert db.status is None, 'status not None' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=12345) as execute: assert db._common_submit(name) execute.assert_called_once_with("execfile('%s')" % name, cred_req=mocker.ANY) assert db.id == 12345, 'id not set' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=[123, 456]): with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs: db._common_submit(name) _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
def test_reset(db): j = Job() j.id = 0 j.backend = db db._parent = j db.getJobObject().subjobs = [Job(), Job()] for subjob in db.getJobObject().subjobs: subjob.backend = db for j in db.getJobObject().subjobs: j.status = 'completing' disallowed_status = ['submitting', 'killed'] for status in disallowed_status: db.getJobObject().status = status db.reset() assert db.getJobObject().status == status, 'status shouldn\'t have changed' db.getJobObject().status = 'completing' db.reset() assert db.getJobObject().status == 'submitted', 'didn\t reset job' assert [j.status for j in db.getJobObject().subjobs] != ['submitted', 'submitted'], 'subjobs not reset properly' db.reset(doSubjobs=True) assert [j.status for j in db.getJobObject().subjobs] == ['submitted', 'submitted'], 'subjobs not reset properly' for j in db.getJobObject().subjobs: j.status = 'completed' db.reset(doSubjobs=True) assert [j.status for j in db.getJobObject().subjobs] != ['submitted', 'submitted'], 'subjobs not supposed to reset'
def test__common_submit(tmpdir, db): from Ganga.Core import BackendError j = Job() j.id = 0 j.backend = db db._parent = j name = str(tmpdir.join('submit_script')) with open(name, 'w') as fd: fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']]))) with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={}): db.id = 1234 db.actualCE = 'test' db.status = 'test' with pytest.raises(BackendError): db._common_submit(name) assert db.id is None, 'id not None' assert db.actualCE is None, 'actualCE not None' assert db.status is None, 'status not None' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True, 'Value': 12345}) as execute: assert db._common_submit(name) execute.assert_called_once_with("execfile('%s')" % name) assert db.id == 12345, 'id not set' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True, 'Value': [123, 456]}): with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs: db._common_submit(name) _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
def __make_subjob__(self, mj, guids, names, sjob_evnts=-1, sites=None): """ private method to create subjob object """ logger.debug('generating subjob to run %d events in-total on files: %s' % (sjob_evnts, repr(guids))) j = Job() j.name = mj.name j.inputdata = mj.inputdata if j.inputdata.type in ['','DQ2']: j.inputdata.guids = guids j.inputdata.names = names j.outputdata = mj.outputdata j.application = mj.application if sjob_evnts != -1: j.application.max_events = sjob_evnts j.backend = mj.backend if j.backend._name in ['LCG'] and j.backend.requirements._name == 'AtlasLCGRequirements': if sites: j.backend.requirements.sites = sites j.inputsandbox = mj.inputsandbox j.outputsandbox = mj.outputsandbox return j
def _create_subjob(self, job, dataset): logger.debug("_create_subjob") datatmp = [] logger.debug("dataset size: %s" % str(len(dataset))) #logger.debug( "dataset: %s" % str(dataset) ) from GangaLHCb.Lib.LHCbDataset.LHCbDataset import LHCbDataset if isinstance(dataset, LHCbDataset): for i in dataset: if isType(i, DiracFile): datatmp.append(i) else: logger.error("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) from Ganga.Core.exceptions import GangaException raise GangaException("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) elif type(dataset) == type([]) or isType(dataset, GangaList()): for this_file in dataset: if type(this_file) is str: datatmp.append(allComponentFilters['gangafiles'](this_file, None)) elif isType(this_file, IGangaFile): datatmp.append(this_file) else: logger.error("Unexpected type: %s" % str(type(this_file))) logger.error("Wanted object to inherit from type: %s: %s" % (str(type(IGangaFile())))) from Ganga.Core.exceptions import GangaException x = GangaException("Unknown(unexpected) file object: %s" % this_file) raise x elif type(dataset) is str: datatmp.append(DiracFile(lfn=dataset)) else: logger.error("Unkown dataset type, cannot perform split here") from Ganga.Core.exceptions import GangaException logger.error("Dataset found: " + str(dataset)) raise GangaException("Unkown dataset type, cannot perform split here") logger.debug("Creating new Job in Splitter") j = Job() logger.debug("Copying From Job") j.copyFrom(stripProxy(job), ['splitter', 'subjobs', 'inputdata', 'inputsandbox', 'inputfiles']) logger.debug("Unsetting Splitter") j.splitter = None #logger.debug("Unsetting Merger") #j.merger = None #j.inputsandbox = [] ## master added automatically #j.inputfiles = [] logger.debug("Setting InputData") j.inputdata = LHCbDataset(files=datatmp[:], persistency=self.persistency, depth=self.depth) #j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice logger.debug("Returning new subjob") return j
def createSubjob(self,job): """ Create a new subjob by copying the master job and setting all fields correctly. """ from Ganga.GPIDev.Lib.Job import Job j = Job() j.copyFrom(job) j.splitter=None #FIXME: j.inputsandbox = [] return j
def split(self,job): from Ganga.GPIDev.Lib.Job import Job subjobs = [] if self.fail == 'exception': x = 'triggered failure during splitting' raise Exception(x) for b in self.backs: j = Job() j.copyFrom(job) j.backend = b subjobs.append(j) return subjobs
def split(self, job): from Ganga.GPIDev.Lib.Job import Job subjobs = [] if self.fail == 'exception': x = 'triggered failure during splitting' raise Exception(x) for b in self.backs: j = Job() j.copyFrom(job) j.backend = b subjobs.append(j) return subjobs
def _create_subjob(self, job, inputdata): j = Job() j.copyFrom(job) j.splitter = None j.merger = None j.inputsandbox = [] # master added automatically j.inputfiles = [] j.inputdata = inputdata return j
def split(self, job): from Ganga.GPIDev.Lib.Job import Job logger.debug("AnaTaskSplitterJob split called") sjl = [] transform = stripProxy(job.application.getTransform()) transform.setAppStatus(job.application, "removed") # Do the splitting for sj in self.subjobs: j = Job() j.inputdata = transform.partitions_data[sj - 1] j.outputdata = job.outputdata j.application = job.application j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" % sj) j.backend = job.backend if transform.partitions_sites: if hasattr(j.backend.requirements, 'sites'): j.backend.requirements.sites = transform.partitions_sites[ sj - 1] else: j.backend.site = transform.partitions_sites[sj - 1] j.inputsandbox = job.inputsandbox j.outputsandbox = job.outputsandbox sjl.append(j) # Task handling j.application.tasks_id = job.application.tasks_id j.application.id = transform.getNewAppID(sj) #transform.setAppStatus(j.application, "submitting") if not job.application.tasks_id.startswith("00"): job.application.tasks_id = "00:%s" % job.application.tasks_id return sjl
def test_reset(db): j = Job() j.id = 0 j.backend = db db._parent = j db.getJobObject().subjobs = [Job(), Job()] for subjob in db.getJobObject().subjobs: subjob.backend = db for j in db.getJobObject().subjobs: j.status = 'completing' disallowed_status = ['submitting', 'killed'] for status in disallowed_status: db.getJobObject().status = status db.reset() assert db.getJobObject( ).status == status, 'status shouldn\'t have changed' db.getJobObject().status = 'completing' db.reset() assert db.getJobObject().status == 'submitted', 'didn\t reset job' assert [j.status for j in db.getJobObject().subjobs ] != ['submitted', 'submitted'], 'subjobs not reset properly' db.reset(doSubjobs=True) assert [j.status for j in db.getJobObject().subjobs ] == ['submitted', 'submitted'], 'subjobs not reset properly' for j in db.getJobObject().subjobs: j.status = 'completed' db.reset(doSubjobs=True) assert [j.status for j in db.getJobObject().subjobs ] != ['submitted', 'submitted'], 'subjobs not supposed to reset'
def test_getOutputDataLFNs(db): from GangaDirac.Lib.Files.DiracFile import DiracFile j = Job() j.id = 0 j.backend = db db._parent = j ####################### class TestFile(object): def __init__(self, lfn): self.lfn = lfn ####################### def fake_outputfiles_iterator(job, file_type): assert isinstance(job, Job) if subjob: assert job.master is not None else: assert job.master is None assert file_type == DiracFile return [TestFile('a'), TestFile(''), TestFile('b'), TestFile(''), TestFile('c'), TestFile('')] with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator): subjob = False assert db.getOutputDataLFNs() == ['a', 'b', 'c'] j.subjobs = [Job(), Job(), Job()] for sj in j.subjobs: sj._setParent(j) subjob = True assert db.getOutputDataLFNs() == ['a', 'b', 'c'] * 3
def split(self,job): from Ganga.GPIDev.Lib.Job import Job logger.debug("AnaTaskSplitterJob split called") sjl = [] transform = stripProxy(job.application.getTransform()) transform.setAppStatus(job.application, "removed") # Do the splitting for sj in self.subjobs: j = Job() j.inputdata = transform.partitions_data[sj-1] j.outputdata = job.outputdata j.application = job.application j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" % sj) j.backend = job.backend if transform.partitions_sites: if hasattr(j.backend.requirements, 'sites'): j.backend.requirements.sites = transform.partitions_sites[sj-1] else: j.backend.site = transform.partitions_sites[sj-1] j.inputsandbox = job.inputsandbox j.outputsandbox = job.outputsandbox sjl.append(j) # Task handling j.application.tasks_id = job.application.tasks_id j.application.id = transform.getNewAppID(sj) #transform.setAppStatus(j.application, "submitting") if not job.application.tasks_id.startswith("00"): job.application.tasks_id = "00:%s" % job.application.tasks_id return sjl
def _create_subjob(self, job, dataset): logger.debug("_create_subjob") j = Job() j.copyFrom(job) j.splitter = None j.merger = None j.inputsandbox = [] # master added automatically j.inputdata = GaudiDataset(files=dataset) ## if not j.inputdata: j.inputdata = GaudiDataset(files=dataset) # else: j.inputdata.files = dataset return j
def test__common_submit(tmpdir, db): from Ganga.Core import BackendError j = Job() j.id = 0 j.backend = db db._parent = j name = str(tmpdir.join('submit_script')) with open(name, 'w') as fd: fd.write( script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']]))) with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={}): db.id = 1234 db.actualCE = 'test' db.status = 'test' with pytest.raises(BackendError): db._common_submit(name) assert db.id is None, 'id not None' assert db.actualCE is None, 'actualCE not None' assert db.status is None, 'status not None' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={ 'OK': True, 'Value': 12345 }) as execute: assert db._common_submit(name) execute.assert_called_once_with("execfile('%s')" % name) assert db.id == 12345, 'id not set' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={ 'OK': True, 'Value': [123, 456] }): with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs: db._common_submit(name) _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
def create_gaudi_subjob(job, inputdata): j = Job() j.name = job.name j.application = copy_app(job.application) j.backend = job.backend # no need to deepcopy if inputdata: j.inputdata = inputdata if hasattr(j.application,'extra'): j.application.extra.inputdata = j.inputdata else: j.inputdata = None if hasattr(j.application,'extra'): j.application.extra.inputdata = BesDataset() j.outputsandbox = job.outputsandbox[:] j.outputdata = job.outputdata return j
def test__common_submit(tmpdir, db, mocker): mocker.patch('Ganga.GPIDev.Credentials.credential_store') from Ganga.Core.exceptions import BackendError j = Job() j.id = 0 j.backend = db db._parent = j name = str(tmpdir.join('submit_script')) with open(name, 'w') as fd: fd.write( script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']]))) with patch('GangaDirac.Lib.Backends.DiracBase.execute', side_effect=GangaDiracError('test Exception')): db.id = 1234 db.actualCE = 'test' db.status = 'test' with pytest.raises(BackendError): db._common_submit(name) assert db.id is None, 'id not None' assert db.actualCE is None, 'actualCE not None' assert db.status is None, 'status not None' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=12345) as execute: assert db._common_submit(name) execute.assert_called_once_with("execfile('%s')" % name, cred_req=mocker.ANY) assert db.id == 12345, 'id not set' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=[123, 456]): with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs: db._common_submit(name) _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
def test_getOutputSandbox(db): j = Job() j.id = 0 j.backend = db db._parent = j db.id = 1234 temp_dir = j.getOutputWorkspace().getPath() with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True}) as execute: assert db.getOutputSandbox(), 'didn\'t run' execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % temp_dir) test_dir = 'test_dir' with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True}) as execute: assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir' execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % test_dir) with patch('GangaDirac.Lib.Backends.DiracBase.execute') as execute: assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully' execute.assert_called_once()
def _create_subjob(self, job, dataset): logger.debug("_create_subjob") datatmp = [] if isinstance(dataset, LHCbDataset): for i in dataset: if isinstance(i, DiracFile): datatmp.extend(i) else: logger.error( "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) from Ganga.Core.exceptions import GangaException raise GangaException( "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) elif isinstance(dataset, list): from Ganga.GPIDev.Base.Proxy import isType for i in dataset: if type(i) is str: datatmp.append(DiracFile(lfn=i)) elif isType(i, DiracFile()): datatmp.extend(i) else: x = GangaException("Unknown(unexpected) file object: %s" % i) raise x else: logger.error("Unkown dataset type, cannot perform split here") from Ganga.Core.exceptions import GangaException raise GangaException( "Unkown dataset type, cannot perform split here") logger.debug("Creating new Job in Splitter") j = Job() j.copyFrom(stripProxy(job)) j.splitter = None j.merger = None j.inputsandbox = [] # master added automatically j.inputfiles = [] j.inputdata = LHCbDataset(files=datatmp[:], persistency=self.persistency, depth=self.depth) j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice return j
def test_submit(db): j = Job() j.id = 0 j.backend = db db._parent = j file1 = tempfile.NamedTemporaryFile('w') file2 = tempfile.NamedTemporaryFile('w') file3 = tempfile.NamedTemporaryFile('w') sjc = StandardJobConfig( exe=script_template, inputbox=[File(file1.name), File(file2.name), File(file3.name)], outputbox=['d', 'e', 'f']) def fake_common_submit(dirac_script): with open(dirac_script, 'r') as f: script = f.read() assert script != script_template, 'script not changed' assert script_template.replace( '##INPUT_SANDBOX##', str(['a', 'b', 'c'] + [ os.path.join(j.getInputWorkspace().getPath(), '_input_sandbox_0.tgz') ] + ['g'])) == script, 'script not what it should be' return True with patch.object(db, '_addition_sandbox_content', return_value=['g']) as _addition_sandbox_content: with patch.object(db, '_common_submit', Mock(fake_common_submit)) as _common_submit: assert db.submit(sjc, ['a', 'b', 'c']) _addition_sandbox_content.assert_called_once_with(sjc) _common_submit.assert_called_once() file1.close() file2.close() file3.close()
def test_removeOutputData(db): from GangaDirac.Lib.Files.DiracFile import DiracFile j = Job() j.id = 0 j.backend = db db._parent = j ####################### class TestFile(object): def __init__(self): pass def remove(self): return 27 ####################### def fake_outputfiles_foreach(job, file_type, func): import types assert isinstance(job, Job) if subjob: assert job.master is not None else: assert job.master is None assert file_type == DiracFile assert isinstance(func, types.FunctionType) assert func(TestFile()) == 27, 'Didn\'t call remove function' with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_foreach', fake_outputfiles_foreach): subjob = False assert db.removeOutputData() is None j.subjobs = [Job(), Job(), Job()] for sj in j.subjobs: sj._setParent(j) subjob = True assert db.removeOutputData() is None
def split(self, job): subjobs = [] for i in range(self.numJobs): j = Job() j.copyFrom(job) j.splitter = None j.merger = None j.inputsandbox = [] # master added automatically subjobs.append(j) return subjobs
def test_submit(db): j = Job() j.id = 0 j.backend = db db._parent = j file1 = tempfile.NamedTemporaryFile('w') file2 = tempfile.NamedTemporaryFile('w') file3 = tempfile.NamedTemporaryFile('w') sjc = StandardJobConfig(exe=script_template, inputbox=[File(file1.name), File(file2.name), File(file3.name)], outputbox=['d', 'e', 'f']) def fake_common_submit(dirac_script): with open(dirac_script, 'r') as f: script = f.read() assert script != script_template, 'script not changed' assert script_template.replace('##INPUT_SANDBOX##', str(['a', 'b', 'c'] + [os.path.join(j.getInputWorkspace().getPath(), '_input_sandbox_0.tgz')] + ['g'])) == script, 'script not what it should be' return True with patch.object(db, '_addition_sandbox_content', return_value=['g']) as _addition_sandbox_content: with patch.object(db, '_common_submit', Mock(fake_common_submit)) as _common_submit: assert db.submit(sjc, ['a', 'b', 'c']) _addition_sandbox_content.assert_called_once_with(sjc) _common_submit.assert_called_once() file1.close() file2.close() file3.close()
def test__resubmit(db): from Ganga.Core.exceptions import BackendError def _common_submit(dirac_script): return '_common_submit run ok' masterj = Job() masterj.id = 0 masterj.backend = db j = Job() j.id = 1 j.backend = db db._setParent(masterj) with patch.object(db, '_common_submit', return_value='_common_submit run ok'): with pytest.raises(BackendError): db._resubmit()
def _create_subjob(self, job, dataset): logger.debug("_create_subjob") datatmp = [] if isinstance(dataset, LHCbDataset): for i in dataset: if isinstance(i, DiracFile): datatmp.extend(i) else: logger.error( "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) from Ganga.Core.exceptions import GangaException raise GangaException( "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) elif isinstance(dataset, list): from Ganga.GPIDev.Base.Proxy import isType for i in dataset: if type(i) is str: datatmp.append(DiracFile(lfn=i)) elif isType(i, DiracFile()): datatmp.extend(i) else: x = GangaException( "Unknown(unexpected) file object: %s" % i) raise x else: logger.error("Unkown dataset type, cannot perform split here") from Ganga.Core.exceptions import GangaException raise GangaException( "Unkown dataset type, cannot perform split here") logger.debug("Creating new Job in Splitter") j = Job() j.copyFrom(stripProxy(job)) j.splitter = None j.merger = None j.inputsandbox = [] # master added automatically j.inputfiles = [] j.inputdata = LHCbDataset(files=datatmp[:], persistency=self.persistency, depth=self.depth) j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice return j
def _create_subjob(self, job, dataset): logger.debug("_create_subjob") datatmp = [] logger.debug("dataset size: %s" % str(len(dataset))) #logger.debug( "dataset: %s" % str(dataset) ) from GangaLHCb.Lib.LHCbDataset.LHCbDataset import LHCbDataset if isinstance(dataset, LHCbDataset): for i in dataset: if isType(i, DiracFile): datatmp.append(i) else: logger.error("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) from Ganga.Core.exceptions import GangaException raise GangaException("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) elif isinstance(dataset, (list, GangaList)): for this_file in dataset: if type(this_file) is str: datatmp.append(allComponentFilters['gangafiles'](this_file, None)) elif isType(this_file, IGangaFile): datatmp.append(this_file) else: logger.error("Unexpected type: %s" % str(type(this_file))) logger.error("Wanted object to inherit from type: %s: %s" % (str(type(IGangaFile())))) from Ganga.Core.exceptions import GangaException x = GangaException("Unknown(unexpected) file object: %s" % this_file) raise x elif type(dataset) is str: datatmp.append(DiracFile(lfn=dataset)) else: logger.error("Unkown dataset type, cannot perform split here") from Ganga.Core.exceptions import GangaException logger.error("Dataset found: " + str(dataset)) raise GangaException("Unkown dataset type, cannot perform split here") logger.debug("Creating new Job in Splitter") j = Job() logger.debug("Copying From Job") j.copyFrom(stripProxy(job), ['splitter', 'subjobs', 'inputdata', 'inputsandbox', 'inputfiles']) logger.debug("Unsetting Splitter") j.splitter = None #logger.debug("Unsetting Merger") #j.merger = None #j.inputsandbox = [] ## master added automatically #j.inputfiles = [] logger.debug("Setting InputData") j.inputdata = LHCbDataset(files=datatmp[:], persistency=self.persistency, depth=self.depth) #j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice logger.debug("Returning new subjob") return j
def createSubjob(self, job, additional_skip_args=None): """ Create a new subjob by copying the master job and setting all fields correctly. """ from Ganga.GPIDev.Lib.Job import Job if additional_skip_args is None: additional_skip_args = [] j = Job() skipping_args = [ 'splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs' ] for arg in additional_skip_args: skipping_args.append(arg) j.copyFrom(job, skipping_args) j.splitter = None j.inputsandbox = [] j.inputfiles = [] j.inputdata = [] return j
def createSubjob(self, job, additional_skip_args=None): """ Create a new subjob by copying the master job and setting all fields correctly. """ from Ganga.GPIDev.Lib.Job import Job if additional_skip_args is None: additional_skip_args = [] j = Job() skipping_args = ['splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs'] for arg in additional_skip_args: skipping_args.append(arg) j.copyFrom(job, skipping_args) j.splitter = None j.inputsandbox = [] j.inputfiles = [] j.inputdata = [] return j
def split(self, job): from Ganga.GPIDev.Lib.Job import Job subjobs = [] primeTables = job.inputdata.get_dataset() ## avoid creating jobs with nothing to do if self.numsubjobs > len(primeTables): self.numsubjobs = len(primeTables) ## create subjobs for i in range(self.numsubjobs): j = Job() j.application = job.application j.inputdata = job.inputdata j.inputdata.table_id_lower = 1 j.inputdata.table_id_upper = 1 j.outputdata = job.outputdata j.inputsandbox = job.inputsandbox j.outputsandbox = job.outputsandbox j.backend = job.backend subjobs.append(j) ## chunksize of each subjob chunksize = len(primeTables) / self.numsubjobs offset = 0 for i in range(len(subjobs)): my_chunksize = chunksize if len(primeTables) % self.numsubjobs >= i + 1: my_chunksize += 1 ## set lower bound id (inclusive) subjobs[i].inputdata.table_id_lower = offset + 1 ## fill subjob with prime tables #for j in range(my_chunksize): # subjobs[i].application.addPrimeTable(primeTables[offset+j]) offset += my_chunksize ## set upper bound id (inclusive) subjobs[i].inputdata.table_id_upper = offset return subjobs
def test__setup_bulk_subjobs(tmpdir, db): from Ganga.Core.exceptions import BackendError from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset from GangaDirac.Lib.Backends import Dirac name = str(tmpdir.join('submit_script')) with open(name, 'w') as fd: fd.write( script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']]))) with pytest.raises(BackendError): db._setup_bulk_subjobs([], name) d = Dirac() j = Job() j.id = 0 # This would normally be set by the registry if this was a proxy job j.application = Executable() j.splitter = ArgSplitter() j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']] j.inputdata = Dataset() j.backend = d d._parent = j dirac_ids = [123, 456] def fake_setup_subjob_dataset(dataset): assert dataset in [['a'], ['b']], 'dataset not passed properly' with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset): assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run' assert len(j.subjobs) == len(dirac_ids), 'didnt work' for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids, j.subjobs): assert id_ == subjob.id, 'ids dont match' assert backend_id == subjob.backend.id, 'backend.ids dont match' assert isinstance(subjob.application, j.application.__class__), 'apps dont match' assert subjob.splitter is None, 'splitter not done' assert isinstance(subjob.backend, j.backend.__class__), 'backend dont match'
def test__resubmit(db): from Ganga.Core import BackendError def _common_submit(dirac_script): return '_common_submit run ok' masterj = Job() masterj.id = 0 masterj.backend = db j = Job() j.id = 1 j.backend = db db._setParent(masterj) with patch.object(db, '_common_submit', return_value='_common_submit run ok'): with pytest.raises(BackendError): db._resubmit()
def split(self,job): from Ganga.GPIDev.Lib.Job import Job subjobs = [] primeTables = job.inputdata.get_dataset() ## avoid creating jobs with nothing to do if self.numsubjobs > len(primeTables): self.numsubjobs = len(primeTables) ## create subjobs for i in range(self.numsubjobs): j = Job() j.application = job.application j.inputdata = job.inputdata j.inputdata.table_id_lower = 1 j.inputdata.table_id_upper = 1 j.outputdata = job.outputdata j.inputsandbox = job.inputsandbox j.outputsandbox = job.outputsandbox j.backend = job.backend subjobs.append(j) ## chunksize of each subjob chunksize = len(primeTables) / self.numsubjobs offset = 0 for i in range(len(subjobs)): my_chunksize = chunksize if len(primeTables) % self.numsubjobs >= i+1: my_chunksize+=1 ## set lower bound id (inclusive) subjobs[i].inputdata.table_id_lower = offset+1 ## fill subjob with prime tables #for j in range(my_chunksize): # subjobs[i].application.addPrimeTable(primeTables[offset+j]) offset += my_chunksize ## set upper bound id (inclusive) subjobs[i].inputdata.table_id_upper = offset return subjobs
def test__setup_bulk_subjobs(tmpdir, db): from Ganga.Core import BackendError from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset from GangaDirac.Lib.Backends import Dirac name = str(tmpdir.join('submit_script')) with open(name, 'w') as fd: fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']]))) with pytest.raises(BackendError): db._setup_bulk_subjobs([], name) d = Dirac() j = Job() j.id = 0 # This would normally be set by the registry if this was a proxy job j.application = Executable() j.splitter = ArgSplitter() j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']] j.inputdata = Dataset() j.backend = d d._parent = j dirac_ids = [123, 456] def fake_setup_subjob_dataset(dataset): assert dataset in [['a'], ['b']], 'dataset not passed properly' with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset): assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run' assert len(j.subjobs) == len(dirac_ids), 'didnt work' for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids, j.subjobs): assert id_ == subjob.id, 'ids dont match' assert backend_id == subjob.backend.id, 'backend.ids dont match' assert isinstance(subjob.application, j.application.__class__), 'apps dont match' assert subjob.splitter is None, 'splitter not done' assert isinstance(subjob.backend, j.backend.__class__), 'backend dont match'
def test_getOutputData(db, tmpdir): j = Job() j.id = 0 j.backend = db db._parent = j with pytest.raises(GangaException): db.getOutputData('/false/dir') ####################### class TestFile(object): def __init__(self, lfn, namePattern): self.lfn = lfn self.namePattern = namePattern def get(self): self.check = 42 test_files = [TestFile('a', 'alpha'), TestFile('', 'delta'), TestFile('b', 'beta'), TestFile('', 'bravo'), TestFile('c', 'charlie'), TestFile('', 'foxtrot')] ####################### def fake_outputfiles_iterator(job, file_type): assert isinstance(job, Job) if subjob: assert job.master is not None else: assert job.master is None assert file_type == DiracFile return test_files with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator): # master jobs ####################### subjob = False assert db.getOutputData() == ['a', 'b', 'c'] for f in test_files: if f.lfn in ['a', 'b', 'c']: assert f.localDir == j.getOutputWorkspace().getPath() assert f.check, 42 == "didn't call get" else: assert not hasattr(f, 'localDir') assert not hasattr(f, 'check') assert db.getOutputData(None, ['alpha', 'charlie']) == ['a', 'c'] assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c'] # subjobs ######################## j.subjobs = [Job(), Job(), Job()] i = 0 for sj in j.subjobs: sj._setParent(j) sj.id = i i += 1 subjob = True assert db.getOutputData() == ['a', 'b', 'c'] * 3 assert db.getOutputData(None, ['beta']) == ['b'] * 3 assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c'] * 3 for i in range(3): assert os.path.isdir(os.path.join(tmpdir.dirname, '0.%d' % i)) os.rmdir(os.path.join(tmpdir.dirname, '0.%d' % i))
def test_getOutputData(db, tmpdir): j = Job() j.id = 0 j.backend = db db._parent = j with pytest.raises(GangaException): db.getOutputData('/false/dir') ####################### class TestFile(object): def __init__(self, lfn, namePattern): self.lfn = lfn self.namePattern = namePattern def get(self): self.check = 42 test_files = [ TestFile('a', 'alpha'), TestFile('', 'delta'), TestFile('b', 'beta'), TestFile('', 'bravo'), TestFile('c', 'charlie'), TestFile('', 'foxtrot') ] ####################### def fake_outputfiles_iterator(job, file_type): assert isinstance(job, Job) if subjob: assert job.master is not None else: assert job.master is None assert file_type == DiracFile return test_files with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator): # master jobs ####################### subjob = False assert db.getOutputData() == ['a', 'b', 'c'] for f in test_files: if f.lfn in ['a', 'b', 'c']: assert f.localDir == j.getOutputWorkspace().getPath() assert f.check, 42 == "didn't call get" else: assert not hasattr(f, 'localDir') assert not hasattr(f, 'check') assert db.getOutputData(None, ['alpha', 'charlie']) == ['a', 'c'] assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c'] # subjobs ######################## j.subjobs = [Job(), Job(), Job()] i = 0 for sj in j.subjobs: sj._setParent(j) sj.id = i i += 1 subjob = True assert db.getOutputData() == ['a', 'b', 'c'] * 3 assert db.getOutputData(None, ['beta']) == ['b'] * 3 assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c'] * 3 for i in range(3): assert os.path.isdir(os.path.join(tmpdir.dirname, '0.%d' % i)) os.rmdir(os.path.join(tmpdir.dirname, '0.%d' % i))