Example #1
0
def test_getOutputSandbox(db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=True) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        temp_dir,
                                        cred_req=mocker.ANY)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=True) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        test_dir,
                                        cred_req=mocker.ANY)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               side_effect=GangaDiracError('test Exception')) as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Example #2
0
def test_getOutputDataLFNs(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################
    class TestFile(object):
        def __init__(self, lfn):
            self.lfn = lfn
    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return [TestFile('a'), TestFile(''),
                TestFile('b'), TestFile(''),
                TestFile('c'), TestFile('')]

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator):
        subjob = False
        assert db.getOutputDataLFNs() == ['a', 'b', 'c']

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.getOutputDataLFNs() == ['a', 'b', 'c'] * 3
Example #3
0
def test__common_submit(tmpdir, db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    from Ganga.Core import BackendError
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', side_effect=GangaDiracError('test Exception')):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=12345) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name, cred_req=mocker.ANY)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=[123, 456]):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
Example #4
0
def test_reset(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.getJobObject().subjobs = [Job(), Job()]
    for subjob in db.getJobObject().subjobs:
        subjob.backend = db
    for j in db.getJobObject().subjobs:
        j.status = 'completing'

    disallowed_status = ['submitting', 'killed']
    for status in disallowed_status:
        db.getJobObject().status = status
        db.reset()
        assert db.getJobObject().status == status, 'status shouldn\'t have changed'

    db.getJobObject().status = 'completing'
    db.reset()
    assert db.getJobObject().status == 'submitted', 'didn\t reset job'
    assert [j.status for j in db.getJobObject().subjobs] != ['submitted', 'submitted'], 'subjobs not reset properly'

    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs] == ['submitted', 'submitted'], 'subjobs not reset properly'

    for j in db.getJobObject().subjobs:
        j.status = 'completed'
    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs] != ['submitted', 'submitted'], 'subjobs not supposed to reset'
Example #5
0
def test__common_submit(tmpdir, db):
    from Ganga.Core import BackendError
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={}):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True, 'Value': 12345}) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True, 'Value': [123, 456]}):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
Example #6
0
    def __make_subjob__(self, mj, guids, names, sjob_evnts=-1, sites=None):
        
        """
        private method to create subjob object
        """
        
        logger.debug('generating subjob to run %d events in-total on files: %s' % (sjob_evnts, repr(guids)))
        j = Job()

        j.name            = mj.name
        j.inputdata       = mj.inputdata

        if j.inputdata.type in ['','DQ2']:
            j.inputdata.guids = guids

        j.inputdata.names = names

        j.outputdata    = mj.outputdata
        j.application   = mj.application
        if sjob_evnts != -1:
            j.application.max_events = sjob_evnts
        j.backend       = mj.backend
        
        if j.backend._name in ['LCG'] and j.backend.requirements._name == 'AtlasLCGRequirements':
            if sites:
                j.backend.requirements.sites = sites
        
        j.inputsandbox  = mj.inputsandbox
        j.outputsandbox = mj.outputsandbox

        return j
Example #7
0
    def _create_subjob(self, job, dataset):
        logger.debug("_create_subjob")
        datatmp = []

        logger.debug("dataset size: %s" % str(len(dataset)))
        #logger.debug( "dataset: %s" % str(dataset) )

        from GangaLHCb.Lib.LHCbDataset.LHCbDataset import LHCbDataset

        if isinstance(dataset, LHCbDataset):
            for i in dataset:
                if isType(i, DiracFile):
                    datatmp.append(i)
                else:
                    logger.error("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
                    from Ganga.Core.exceptions import GangaException
                    raise GangaException("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
        elif type(dataset) == type([]) or isType(dataset, GangaList()):
            for this_file in dataset:
                if type(this_file) is str:
                    datatmp.append(allComponentFilters['gangafiles'](this_file, None))
                elif isType(this_file, IGangaFile):
                    datatmp.append(this_file)
                else:
                    logger.error("Unexpected type: %s" % str(type(this_file)))
                    logger.error("Wanted object to inherit from type: %s: %s" % (str(type(IGangaFile()))))
                    from Ganga.Core.exceptions import GangaException
                    x = GangaException("Unknown(unexpected) file object: %s" % this_file)
                    raise x
        elif type(dataset) is str:
            datatmp.append(DiracFile(lfn=dataset))
        else:
            logger.error("Unkown dataset type, cannot perform split here")
            from Ganga.Core.exceptions import GangaException
            logger.error("Dataset found: " + str(dataset))
            raise GangaException("Unkown dataset type, cannot perform split here")

        logger.debug("Creating new Job in Splitter")
        j = Job()
        logger.debug("Copying From Job")
        j.copyFrom(stripProxy(job), ['splitter', 'subjobs', 'inputdata', 'inputsandbox', 'inputfiles'])
        logger.debug("Unsetting Splitter")
        j.splitter = None
        #logger.debug("Unsetting Merger")
        #j.merger = None
        #j.inputsandbox = [] ## master added automatically
        #j.inputfiles = []
        logger.debug("Setting InputData")
        j.inputdata = LHCbDataset(files=datatmp[:],
                                  persistency=self.persistency,
                                  depth=self.depth)
        #j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice
        logger.debug("Returning new subjob")
        return j
Example #8
0
 def createSubjob(self,job):
     """ Create a new subjob by copying the master job and setting all fields correctly.
     """
     from Ganga.GPIDev.Lib.Job import Job
     
     j = Job()
     j.copyFrom(job)
     j.splitter=None
     #FIXME:
     j.inputsandbox = []
     return j
Example #9
0
 def split(self,job):
     from Ganga.GPIDev.Lib.Job import Job
     subjobs = []
     if self.fail == 'exception':
         x = 'triggered failure during splitting'
         raise Exception(x)
     for b in self.backs:
         j = Job()
         j.copyFrom(job)
         j.backend = b
         subjobs.append(j)
     return subjobs
Example #10
0
 def split(self, job):
     from Ganga.GPIDev.Lib.Job import Job
     subjobs = []
     if self.fail == 'exception':
         x = 'triggered failure during splitting'
         raise Exception(x)
     for b in self.backs:
         j = Job()
         j.copyFrom(job)
         j.backend = b
         subjobs.append(j)
     return subjobs
Example #11
0
    def _create_subjob(self, job, inputdata):
        j = Job()
        j.copyFrom(job)
        j.splitter = None
        j.merger = None
        j.inputsandbox = []  # master added automatically
        j.inputfiles = []
        j.inputdata = inputdata

        return j
Example #12
0
    def split(self, job):
        from Ganga.GPIDev.Lib.Job import Job
        logger.debug("AnaTaskSplitterJob split called")
        sjl = []
        transform = stripProxy(job.application.getTransform())
        transform.setAppStatus(job.application, "removed")
        # Do the splitting
        for sj in self.subjobs:
            j = Job()
            j.inputdata = transform.partitions_data[sj - 1]
            j.outputdata = job.outputdata
            j.application = job.application
            j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" %
                                                   sj)
            j.backend = job.backend
            if transform.partitions_sites:
                if hasattr(j.backend.requirements, 'sites'):
                    j.backend.requirements.sites = transform.partitions_sites[
                        sj - 1]
                else:
                    j.backend.site = transform.partitions_sites[sj - 1]

            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            sjl.append(j)
            # Task handling
            j.application.tasks_id = job.application.tasks_id
            j.application.id = transform.getNewAppID(sj)
            #transform.setAppStatus(j.application, "submitting")
        if not job.application.tasks_id.startswith("00"):
            job.application.tasks_id = "00:%s" % job.application.tasks_id
        return sjl
Example #13
0
def test_reset(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.getJobObject().subjobs = [Job(), Job()]
    for subjob in db.getJobObject().subjobs:
        subjob.backend = db
    for j in db.getJobObject().subjobs:
        j.status = 'completing'

    disallowed_status = ['submitting', 'killed']
    for status in disallowed_status:
        db.getJobObject().status = status
        db.reset()
        assert db.getJobObject(
        ).status == status, 'status shouldn\'t have changed'

    db.getJobObject().status = 'completing'
    db.reset()
    assert db.getJobObject().status == 'submitted', 'didn\t reset job'
    assert [j.status for j in db.getJobObject().subjobs
            ] != ['submitted', 'submitted'], 'subjobs not reset properly'

    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs
            ] == ['submitted', 'submitted'], 'subjobs not reset properly'

    for j in db.getJobObject().subjobs:
        j.status = 'completed'
    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs
            ] != ['submitted', 'submitted'], 'subjobs not supposed to reset'
Example #14
0
def test_getOutputDataLFNs(db):
    from GangaDirac.Lib.Files.DiracFile import DiracFile

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################
    class TestFile(object):
        def __init__(self, lfn):
            self.lfn = lfn
    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return [TestFile('a'), TestFile(''),
                TestFile('b'), TestFile(''),
                TestFile('c'), TestFile('')]

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator):
        subjob = False
        assert db.getOutputDataLFNs() == ['a', 'b', 'c']

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.getOutputDataLFNs() == ['a', 'b', 'c'] * 3
Example #15
0
    def _create_subjob(self, job, inputdata):
        j = Job()
        j.copyFrom(job)
        j.splitter = None
        j.merger = None
        j.inputsandbox = []  # master added automatically
        j.inputfiles = []
        j.inputdata = inputdata

        return j
    def split(self,job):
        from Ganga.GPIDev.Lib.Job import Job
        logger.debug("AnaTaskSplitterJob split called")
        sjl = []
        transform = stripProxy(job.application.getTransform())
        transform.setAppStatus(job.application, "removed")
        # Do the splitting
        for sj in self.subjobs:
            j = Job()
            j.inputdata = transform.partitions_data[sj-1]
            j.outputdata = job.outputdata
            j.application = job.application
            j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" % sj)
            j.backend = job.backend
            if transform.partitions_sites:
                if hasattr(j.backend.requirements, 'sites'):                
                    j.backend.requirements.sites = transform.partitions_sites[sj-1]                    
                else:
                    j.backend.site = transform.partitions_sites[sj-1]

            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            sjl.append(j)
            # Task handling
            j.application.tasks_id = job.application.tasks_id
            j.application.id = transform.getNewAppID(sj)
             #transform.setAppStatus(j.application, "submitting")
        if not job.application.tasks_id.startswith("00"):
            job.application.tasks_id = "00:%s" % job.application.tasks_id
        return sjl
Example #17
0
 def _create_subjob(self, job, dataset):
     logger.debug("_create_subjob")
     j = Job()
     j.copyFrom(job)
     j.splitter = None
     j.merger = None
     j.inputsandbox = []  # master added automatically
     j.inputdata = GaudiDataset(files=dataset)
     ##         if not j.inputdata: j.inputdata = GaudiDataset(files=dataset)
     # else:               j.inputdata.files = dataset
     return j
Example #18
0
def test__common_submit(tmpdir, db):
    from Ganga.Core import BackendError
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(
            script_template.replace('###PARAMETRIC_INPUTDATA###',
                                    str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={}):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={
                   'OK': True,
                   'Value': 12345
               }) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={
                   'OK': True,
                   'Value': [123, 456]
               }):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
Example #19
0
def create_gaudi_subjob(job, inputdata):
    j = Job()
    j.name = job.name
    j.application = copy_app(job.application)
    j.backend = job.backend # no need to deepcopy
    if inputdata:
        j.inputdata = inputdata
        if hasattr(j.application,'extra'):
            j.application.extra.inputdata = j.inputdata
    else:
        j.inputdata = None
        if hasattr(j.application,'extra'):
            j.application.extra.inputdata = BesDataset()
    j.outputsandbox = job.outputsandbox[:]
    j.outputdata = job.outputdata
    return j
Example #20
0
def test__common_submit(tmpdir, db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    from Ganga.Core.exceptions import BackendError

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(
            script_template.replace('###PARAMETRIC_INPUTDATA###',
                                    str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               side_effect=GangaDiracError('test Exception')):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=12345) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name,
                                        cred_req=mocker.ANY)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=[123, 456]):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
Example #21
0
def test_getOutputSandbox(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % temp_dir)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % test_dir)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute') as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Example #22
0
    def _create_subjob(self, job, dataset):
        logger.debug("_create_subjob")

        datatmp = []
        if isinstance(dataset, LHCbDataset):
            for i in dataset:
                if isinstance(i, DiracFile):
                    datatmp.extend(i)
                else:
                    logger.error(
                        "Unkown file-type %s, cannot perform split with file %s"
                        % (type(i), str(i)))
                    from Ganga.Core.exceptions import GangaException
                    raise GangaException(
                        "Unkown file-type %s, cannot perform split with file %s"
                        % (type(i), str(i)))
        elif isinstance(dataset, list):
            from Ganga.GPIDev.Base.Proxy import isType
            for i in dataset:
                if type(i) is str:
                    datatmp.append(DiracFile(lfn=i))
                elif isType(i, DiracFile()):
                    datatmp.extend(i)
                else:
                    x = GangaException("Unknown(unexpected) file object: %s" %
                                       i)
                    raise x
        else:
            logger.error("Unkown dataset type, cannot perform split here")
            from Ganga.Core.exceptions import GangaException
            raise GangaException(
                "Unkown dataset type, cannot perform split here")

        logger.debug("Creating new Job in Splitter")
        j = Job()
        j.copyFrom(stripProxy(job))
        j.splitter = None
        j.merger = None
        j.inputsandbox = []  # master added automatically
        j.inputfiles = []
        j.inputdata = LHCbDataset(files=datatmp[:],
                                  persistency=self.persistency,
                                  depth=self.depth)
        j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice

        return j
Example #23
0
def test_submit(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    file1 = tempfile.NamedTemporaryFile('w')
    file2 = tempfile.NamedTemporaryFile('w')
    file3 = tempfile.NamedTemporaryFile('w')
    sjc = StandardJobConfig(
        exe=script_template,
        inputbox=[File(file1.name),
                  File(file2.name),
                  File(file3.name)],
        outputbox=['d', 'e', 'f'])

    def fake_common_submit(dirac_script):
        with open(dirac_script, 'r') as f:
            script = f.read()
            assert script != script_template, 'script not changed'
            assert script_template.replace(
                '##INPUT_SANDBOX##',
                str(['a', 'b', 'c'] + [
                    os.path.join(j.getInputWorkspace().getPath(),
                                 '_input_sandbox_0.tgz')
                ] + ['g'])) == script, 'script not what it should be'

        return True

    with patch.object(db, '_addition_sandbox_content',
                      return_value=['g']) as _addition_sandbox_content:
        with patch.object(db, '_common_submit',
                          Mock(fake_common_submit)) as _common_submit:
            assert db.submit(sjc, ['a', 'b', 'c'])

            _addition_sandbox_content.assert_called_once_with(sjc)
            _common_submit.assert_called_once()

    file1.close()
    file2.close()
    file3.close()
Example #24
0
def test_removeOutputData(db):
    from GangaDirac.Lib.Files.DiracFile import DiracFile

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################

    class TestFile(object):
        def __init__(self):
            pass

        def remove(self):
            return 27

    #######################

    def fake_outputfiles_foreach(job, file_type, func):
        import types
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
            assert isinstance(func, types.FunctionType)
            assert func(TestFile()) == 27, 'Didn\'t call remove function'

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_foreach', fake_outputfiles_foreach):
        subjob = False
        assert db.removeOutputData() is None

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.removeOutputData() is None
Example #25
0
    def _create_subjob(self, job, dataset):
        logger.debug("_create_subjob")
        j = Job()
        j.copyFrom(job)
        j.splitter = None
        j.merger = None
        j.inputsandbox = []  # master added automatically
        j.inputdata = GaudiDataset(files=dataset)
##         if not j.inputdata: j.inputdata = GaudiDataset(files=dataset)
# else:               j.inputdata.files = dataset
        return j
Example #26
0
def test_getOutputSandbox(db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=True) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % temp_dir, cred_req=mocker.ANY)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=True) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % test_dir, cred_req=mocker.ANY)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', side_effect=GangaDiracError('test Exception')) as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Example #27
0
    def split(self, job):
        subjobs = []

        for i in range(self.numJobs):
            j = Job()
            j.copyFrom(job)
            j.splitter = None
            j.merger = None
            j.inputsandbox = []  # master added automatically
            subjobs.append(j)

        return subjobs
Example #28
0
def test_submit(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    file1 = tempfile.NamedTemporaryFile('w')
    file2 = tempfile.NamedTemporaryFile('w')
    file3 = tempfile.NamedTemporaryFile('w')
    sjc = StandardJobConfig(exe=script_template,
                            inputbox=[File(file1.name),
                                      File(file2.name),
                                      File(file3.name)],
                            outputbox=['d', 'e', 'f'])

    def fake_common_submit(dirac_script):
        with open(dirac_script, 'r') as f:
            script = f.read()
            assert script != script_template, 'script not changed'
            assert script_template.replace('##INPUT_SANDBOX##',
                                           str(['a', 'b', 'c'] +
                                               [os.path.join(j.getInputWorkspace().getPath(),
                                                             '_input_sandbox_0.tgz')] +
                                               ['g'])) == script, 'script not what it should be'

        return True

    with patch.object(db, '_addition_sandbox_content', return_value=['g']) as _addition_sandbox_content:
        with patch.object(db, '_common_submit', Mock(fake_common_submit)) as _common_submit:
            assert db.submit(sjc, ['a', 'b', 'c'])

            _addition_sandbox_content.assert_called_once_with(sjc)
            _common_submit.assert_called_once()

    file1.close()
    file2.close()
    file3.close()
Example #29
0
def test_getOutputSandbox(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        temp_dir)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        test_dir)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute') as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Example #30
0
def test__resubmit(db):
    from Ganga.Core.exceptions import BackendError

    def _common_submit(dirac_script):
        return '_common_submit run ok'

    masterj = Job()
    masterj.id = 0
    masterj.backend = db
    j = Job()
    j.id = 1
    j.backend = db
    db._setParent(masterj)

    with patch.object(db, '_common_submit', return_value='_common_submit run ok'):
        with pytest.raises(BackendError):
            db._resubmit()
Example #31
0
    def _create_subjob(self, job, dataset):
        logger.debug("_create_subjob")

        datatmp = []
        if isinstance(dataset, LHCbDataset):
            for i in dataset:
                if isinstance(i, DiracFile):
                    datatmp.extend(i)
                else:
                    logger.error(
                        "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
                    from Ganga.Core.exceptions import GangaException
                    raise GangaException(
                        "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
        elif isinstance(dataset, list):
            from Ganga.GPIDev.Base.Proxy import isType
            for i in dataset:
                if type(i) is str:
                    datatmp.append(DiracFile(lfn=i))
                elif isType(i, DiracFile()):
                    datatmp.extend(i)
                else:
                    x = GangaException(
                        "Unknown(unexpected) file object: %s" % i)
                    raise x
        else:
            logger.error("Unkown dataset type, cannot perform split here")
            from Ganga.Core.exceptions import GangaException
            raise GangaException(
                "Unkown dataset type, cannot perform split here")

        logger.debug("Creating new Job in Splitter")
        j = Job()
        j.copyFrom(stripProxy(job))
        j.splitter = None
        j.merger = None
        j.inputsandbox = []  # master added automatically
        j.inputfiles = []
        j.inputdata = LHCbDataset(files=datatmp[:],
                                  persistency=self.persistency,
                                  depth=self.depth)
        j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice

        return j
Example #32
0
    def _create_subjob(self, job, dataset):
        logger.debug("_create_subjob")
        datatmp = []

        logger.debug("dataset size: %s" % str(len(dataset)))
        #logger.debug( "dataset: %s" % str(dataset) )

        from GangaLHCb.Lib.LHCbDataset.LHCbDataset import LHCbDataset

        if isinstance(dataset, LHCbDataset):
            for i in dataset:
                if isType(i, DiracFile):
                    datatmp.append(i)
                else:
                    logger.error("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
                    from Ganga.Core.exceptions import GangaException
                    raise GangaException("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
        elif isinstance(dataset, (list, GangaList)):
            for this_file in dataset:
                if type(this_file) is str:
                    datatmp.append(allComponentFilters['gangafiles'](this_file, None))
                elif isType(this_file, IGangaFile):
                    datatmp.append(this_file)
                else:
                    logger.error("Unexpected type: %s" % str(type(this_file)))
                    logger.error("Wanted object to inherit from type: %s: %s" % (str(type(IGangaFile()))))
                    from Ganga.Core.exceptions import GangaException
                    x = GangaException("Unknown(unexpected) file object: %s" % this_file)
                    raise x
        elif type(dataset) is str:
            datatmp.append(DiracFile(lfn=dataset))
        else:
            logger.error("Unkown dataset type, cannot perform split here")
            from Ganga.Core.exceptions import GangaException
            logger.error("Dataset found: " + str(dataset))
            raise GangaException("Unkown dataset type, cannot perform split here")

        logger.debug("Creating new Job in Splitter")
        j = Job()
        logger.debug("Copying From Job")
        j.copyFrom(stripProxy(job), ['splitter', 'subjobs', 'inputdata', 'inputsandbox', 'inputfiles'])
        logger.debug("Unsetting Splitter")
        j.splitter = None
        #logger.debug("Unsetting Merger")
        #j.merger = None
        #j.inputsandbox = [] ## master added automatically
        #j.inputfiles = []
        logger.debug("Setting InputData")
        j.inputdata = LHCbDataset(files=datatmp[:],
                                  persistency=self.persistency,
                                  depth=self.depth)
        #j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice
        logger.debug("Returning new subjob")
        return j
Example #33
0
    def createSubjob(self, job, additional_skip_args=None):
        """ Create a new subjob by copying the master job and setting all fields correctly.
        """
        from Ganga.GPIDev.Lib.Job import Job
        if additional_skip_args is None:
            additional_skip_args = []

        j = Job()
        skipping_args = [
            'splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs'
        ]
        for arg in additional_skip_args:
            skipping_args.append(arg)
        j.copyFrom(job, skipping_args)
        j.splitter = None
        j.inputsandbox = []
        j.inputfiles = []
        j.inputdata = []
        return j
Example #34
0
def test_removeOutputData(db):
    from GangaDirac.Lib.Files.DiracFile import DiracFile

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################

    class TestFile(object):
        def __init__(self):
            pass

        def remove(self):
            return 27

    #######################

    def fake_outputfiles_foreach(job, file_type, func):
        import types
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
            assert isinstance(func, types.FunctionType)
            assert func(TestFile()) == 27, 'Didn\'t call remove function'

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_foreach',
               fake_outputfiles_foreach):
        subjob = False
        assert db.removeOutputData() is None

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.removeOutputData() is None
Example #35
0
    def createSubjob(self, job, additional_skip_args=None):
        """ Create a new subjob by copying the master job and setting all fields correctly.
        """
        from Ganga.GPIDev.Lib.Job import Job
        if additional_skip_args is None:
            additional_skip_args = []

        j = Job()
        skipping_args = ['splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs']
        for arg in additional_skip_args:
            skipping_args.append(arg)
        j.copyFrom(job, skipping_args)
        j.splitter = None
        j.inputsandbox = []
        j.inputfiles = []
        j.inputdata = []
        return j
Example #36
0
    def split(self, job):
        from Ganga.GPIDev.Lib.Job import Job
        subjobs = []
        primeTables = job.inputdata.get_dataset()

        ## avoid creating jobs with nothing to do
        if self.numsubjobs > len(primeTables):
            self.numsubjobs = len(primeTables)

        ## create subjobs
        for i in range(self.numsubjobs):
            j = Job()
            j.application = job.application
            j.inputdata = job.inputdata
            j.inputdata.table_id_lower = 1
            j.inputdata.table_id_upper = 1
            j.outputdata = job.outputdata
            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            j.backend = job.backend
            subjobs.append(j)

        ## chunksize of each subjob
        chunksize = len(primeTables) / self.numsubjobs

        offset = 0
        for i in range(len(subjobs)):
            my_chunksize = chunksize
            if len(primeTables) % self.numsubjobs >= i + 1: my_chunksize += 1

            ## set lower bound id (inclusive)
            subjobs[i].inputdata.table_id_lower = offset + 1
            ## fill subjob with prime tables
            #for j in range(my_chunksize):
            #    subjobs[i].application.addPrimeTable(primeTables[offset+j])
            offset += my_chunksize
            ## set upper  bound id (inclusive)
            subjobs[i].inputdata.table_id_upper = offset

        return subjobs
Example #37
0
def test__setup_bulk_subjobs(tmpdir, db):
    from Ganga.Core.exceptions import BackendError
    from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset
    from GangaDirac.Lib.Backends import Dirac

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(
            script_template.replace('###PARAMETRIC_INPUTDATA###',
                                    str([['a'], ['b']])))

    with pytest.raises(BackendError):
        db._setup_bulk_subjobs([], name)

    d = Dirac()
    j = Job()
    j.id = 0  # This would normally be set by the registry if this was a proxy job
    j.application = Executable()
    j.splitter = ArgSplitter()
    j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']]
    j.inputdata = Dataset()
    j.backend = d
    d._parent = j

    dirac_ids = [123, 456]

    def fake_setup_subjob_dataset(dataset):
        assert dataset in [['a'], ['b']], 'dataset not passed properly'

    with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset):
        assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run'

    assert len(j.subjobs) == len(dirac_ids), 'didnt work'
    for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids,
                                       j.subjobs):
        assert id_ == subjob.id, 'ids dont match'
        assert backend_id == subjob.backend.id, 'backend.ids dont match'
        assert isinstance(subjob.application,
                          j.application.__class__), 'apps dont match'
        assert subjob.splitter is None, 'splitter not done'
        assert isinstance(subjob.backend,
                          j.backend.__class__), 'backend dont match'
Example #38
0
def test__resubmit(db):
    from Ganga.Core import BackendError

    def _common_submit(dirac_script):
        return '_common_submit run ok'

    masterj = Job()
    masterj.id = 0
    masterj.backend = db
    j = Job()
    j.id = 1
    j.backend = db
    db._setParent(masterj)

    with patch.object(db, '_common_submit', return_value='_common_submit run ok'):
        with pytest.raises(BackendError):
            db._resubmit()
Example #39
0
    def split(self,job):
        from Ganga.GPIDev.Lib.Job import Job
        subjobs = []
        primeTables = job.inputdata.get_dataset()

        ## avoid creating jobs with nothing to do
        if self.numsubjobs > len(primeTables):
            self.numsubjobs = len(primeTables)

        ## create subjobs
        for i in range(self.numsubjobs):
            j = Job()
            j.application   = job.application
            j.inputdata     = job.inputdata 
            j.inputdata.table_id_lower = 1 
            j.inputdata.table_id_upper = 1
            j.outputdata    = job.outputdata 
            j.inputsandbox  = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            j.backend       = job.backend
            subjobs.append(j)

        ## chunksize of each subjob
        chunksize = len(primeTables) / self.numsubjobs

        offset = 0
        for i in range(len(subjobs)):
            my_chunksize = chunksize
            if len(primeTables) % self.numsubjobs >= i+1: my_chunksize+=1

            ## set lower bound id (inclusive)
            subjobs[i].inputdata.table_id_lower = offset+1
            ## fill subjob with prime tables 
            #for j in range(my_chunksize):
            #    subjobs[i].application.addPrimeTable(primeTables[offset+j])
            offset += my_chunksize
            ## set upper  bound id (inclusive)
            subjobs[i].inputdata.table_id_upper = offset

        return subjobs
Example #40
0
def test__setup_bulk_subjobs(tmpdir, db):
    from Ganga.Core import BackendError
    from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset
    from GangaDirac.Lib.Backends import Dirac

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']])))

    with pytest.raises(BackendError):
        db._setup_bulk_subjobs([], name)

    d = Dirac()
    j = Job()
    j.id = 0  # This would normally be set by the registry if this was a proxy job
    j.application = Executable()
    j.splitter = ArgSplitter()
    j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']]
    j.inputdata = Dataset()
    j.backend = d
    d._parent = j

    dirac_ids = [123, 456]

    def fake_setup_subjob_dataset(dataset):
        assert dataset in [['a'], ['b']], 'dataset not passed properly'

    with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset):
        assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run'

    assert len(j.subjobs) == len(dirac_ids), 'didnt work'
    for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids, j.subjobs):
        assert id_ == subjob.id, 'ids dont match'
        assert backend_id == subjob.backend.id, 'backend.ids dont match'
        assert isinstance(subjob.application, j.application.__class__), 'apps dont match'
        assert subjob.splitter is None, 'splitter not done'
        assert isinstance(subjob.backend, j.backend.__class__), 'backend dont match'
Example #41
0
def test_getOutputData(db, tmpdir):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    with pytest.raises(GangaException):
        db.getOutputData('/false/dir')

    #######################
    class TestFile(object):
        def __init__(self, lfn, namePattern):
            self.lfn = lfn
            self.namePattern = namePattern

        def get(self):
            self.check = 42

    test_files = [TestFile('a', 'alpha'), TestFile('', 'delta'),
                  TestFile('b', 'beta'), TestFile('', 'bravo'),
                  TestFile('c', 'charlie'), TestFile('', 'foxtrot')]

    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return test_files

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator):

        # master jobs
        #######################
        subjob = False
        assert db.getOutputData() == ['a', 'b', 'c']
        for f in test_files:
            if f.lfn in ['a', 'b', 'c']:
                assert f.localDir == j.getOutputWorkspace().getPath()
                assert f.check, 42 == "didn't call get"
            else:
                assert not hasattr(f, 'localDir')
                assert not hasattr(f, 'check')
        assert db.getOutputData(None, ['alpha', 'charlie']) == ['a', 'c']
        assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c']

        # subjobs
        ########################
        j.subjobs = [Job(), Job(), Job()]
        i = 0
        for sj in j.subjobs:
            sj._setParent(j)
            sj.id = i
            i += 1

        subjob = True
        assert db.getOutputData() == ['a', 'b', 'c'] * 3
        assert db.getOutputData(None, ['beta']) == ['b'] * 3
        assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c'] * 3
        for i in range(3):
            assert os.path.isdir(os.path.join(tmpdir.dirname, '0.%d' % i))
            os.rmdir(os.path.join(tmpdir.dirname, '0.%d' % i))
Example #42
0
def test_getOutputData(db, tmpdir):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    with pytest.raises(GangaException):
        db.getOutputData('/false/dir')

    #######################
    class TestFile(object):
        def __init__(self, lfn, namePattern):
            self.lfn = lfn
            self.namePattern = namePattern

        def get(self):
            self.check = 42

    test_files = [
        TestFile('a', 'alpha'),
        TestFile('', 'delta'),
        TestFile('b', 'beta'),
        TestFile('', 'bravo'),
        TestFile('c', 'charlie'),
        TestFile('', 'foxtrot')
    ]

    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return test_files

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator',
               fake_outputfiles_iterator):

        # master jobs
        #######################
        subjob = False
        assert db.getOutputData() == ['a', 'b', 'c']
        for f in test_files:
            if f.lfn in ['a', 'b', 'c']:
                assert f.localDir == j.getOutputWorkspace().getPath()
                assert f.check, 42 == "didn't call get"
            else:
                assert not hasattr(f, 'localDir')
                assert not hasattr(f, 'check')
        assert db.getOutputData(None, ['alpha', 'charlie']) == ['a', 'c']
        assert db.getOutputData(tmpdir.dirname,
                                ['alpha', 'charlie']) == ['a', 'c']

        # subjobs
        ########################
        j.subjobs = [Job(), Job(), Job()]
        i = 0
        for sj in j.subjobs:
            sj._setParent(j)
            sj.id = i
            i += 1

        subjob = True
        assert db.getOutputData() == ['a', 'b', 'c'] * 3
        assert db.getOutputData(None, ['beta']) == ['b'] * 3
        assert db.getOutputData(tmpdir.dirname,
                                ['alpha', 'charlie']) == ['a', 'c'] * 3
        for i in range(3):
            assert os.path.isdir(os.path.join(tmpdir.dirname, '0.%d' % i))
            os.rmdir(os.path.join(tmpdir.dirname, '0.%d' % i))