Esempio n. 1
0
    def __make_subjob__(self, mj, guids, names, sjob_evnts=-1, sites=None):
        
        """
        private method to create subjob object
        """
        
        logger.debug('generating subjob to run %d events in-total on files: %s' % (sjob_evnts, repr(guids)))
        j = Job()

        j.name            = mj.name
        j.inputdata       = mj.inputdata

        if j.inputdata.type in ['','DQ2']:
            j.inputdata.guids = guids

        j.inputdata.names = names

        j.outputdata    = mj.outputdata
        j.application   = mj.application
        if sjob_evnts != -1:
            j.application.max_events = sjob_evnts
        j.backend       = mj.backend
        
        if j.backend._name in ['LCG'] and j.backend.requirements._name == 'AtlasLCGRequirements':
            if sites:
                j.backend.requirements.sites = sites
        
        j.inputsandbox  = mj.inputsandbox
        j.outputsandbox = mj.outputsandbox

        return j
Esempio n. 2
0
def test__common_submit(tmpdir, db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    from Ganga.Core import BackendError
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', side_effect=GangaDiracError('test Exception')):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=12345) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name, cred_req=mocker.ANY)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=[123, 456]):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
    def split(self,job):
        from Ganga.GPIDev.Lib.Job import Job
        logger.debug("AnaTaskSplitterJob split called")
        sjl = []
        transform = stripProxy(job.application.getTransform())
        transform.setAppStatus(job.application, "removed")
        # Do the splitting
        for sj in self.subjobs:
            j = Job()
            j.inputdata = transform.partitions_data[sj-1]
            j.outputdata = job.outputdata
            j.application = job.application
            j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" % sj)
            j.backend = job.backend
            if transform.partitions_sites:
                if hasattr(j.backend.requirements, 'sites'):                
                    j.backend.requirements.sites = transform.partitions_sites[sj-1]                    
                else:
                    j.backend.site = transform.partitions_sites[sj-1]

            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            sjl.append(j)
            # Task handling
            j.application.tasks_id = job.application.tasks_id
            j.application.id = transform.getNewAppID(sj)
             #transform.setAppStatus(j.application, "submitting")
        if not job.application.tasks_id.startswith("00"):
            job.application.tasks_id = "00:%s" % job.application.tasks_id
        return sjl
Esempio n. 4
0
    def split(self, job):
        from Ganga.GPIDev.Lib.Job import Job
        logger.debug("AnaTaskSplitterJob split called")
        sjl = []
        transform = stripProxy(job.application.getTransform())
        transform.setAppStatus(job.application, "removed")
        # Do the splitting
        for sj in self.subjobs:
            j = Job()
            j.inputdata = transform.partitions_data[sj - 1]
            j.outputdata = job.outputdata
            j.application = job.application
            j.application.atlas_environment.append("OUTPUT_FILE_NUMBER=%i" %
                                                   sj)
            j.backend = job.backend
            if transform.partitions_sites:
                if hasattr(j.backend.requirements, 'sites'):
                    j.backend.requirements.sites = transform.partitions_sites[
                        sj - 1]
                else:
                    j.backend.site = transform.partitions_sites[sj - 1]

            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            sjl.append(j)
            # Task handling
            j.application.tasks_id = job.application.tasks_id
            j.application.id = transform.getNewAppID(sj)
            #transform.setAppStatus(j.application, "submitting")
        if not job.application.tasks_id.startswith("00"):
            job.application.tasks_id = "00:%s" % job.application.tasks_id
        return sjl
Esempio n. 5
0
def test_getOutputDataLFNs(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################
    class TestFile(object):
        def __init__(self, lfn):
            self.lfn = lfn
    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return [TestFile('a'), TestFile(''),
                TestFile('b'), TestFile(''),
                TestFile('c'), TestFile('')]

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator):
        subjob = False
        assert db.getOutputDataLFNs() == ['a', 'b', 'c']

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.getOutputDataLFNs() == ['a', 'b', 'c'] * 3
Esempio n. 6
0
def test__common_submit(tmpdir, db):
    from Ganga.Core import BackendError
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={}):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True, 'Value': 12345}) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True, 'Value': [123, 456]}):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
Esempio n. 7
0
def test_getOutputDataLFNs(db):
    from GangaDirac.Lib.Files.DiracFile import DiracFile

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################
    class TestFile(object):
        def __init__(self, lfn):
            self.lfn = lfn
    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return [TestFile('a'), TestFile(''),
                TestFile('b'), TestFile(''),
                TestFile('c'), TestFile('')]

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator):
        subjob = False
        assert db.getOutputDataLFNs() == ['a', 'b', 'c']

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.getOutputDataLFNs() == ['a', 'b', 'c'] * 3
Esempio n. 8
0
def test_reset(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.getJobObject().subjobs = [Job(), Job()]
    for subjob in db.getJobObject().subjobs:
        subjob.backend = db
    for j in db.getJobObject().subjobs:
        j.status = 'completing'

    disallowed_status = ['submitting', 'killed']
    for status in disallowed_status:
        db.getJobObject().status = status
        db.reset()
        assert db.getJobObject(
        ).status == status, 'status shouldn\'t have changed'

    db.getJobObject().status = 'completing'
    db.reset()
    assert db.getJobObject().status == 'submitted', 'didn\t reset job'
    assert [j.status for j in db.getJobObject().subjobs
            ] != ['submitted', 'submitted'], 'subjobs not reset properly'

    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs
            ] == ['submitted', 'submitted'], 'subjobs not reset properly'

    for j in db.getJobObject().subjobs:
        j.status = 'completed'
    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs
            ] != ['submitted', 'submitted'], 'subjobs not supposed to reset'
Esempio n. 9
0
def test_reset(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.getJobObject().subjobs = [Job(), Job()]
    for subjob in db.getJobObject().subjobs:
        subjob.backend = db
    for j in db.getJobObject().subjobs:
        j.status = 'completing'

    disallowed_status = ['submitting', 'killed']
    for status in disallowed_status:
        db.getJobObject().status = status
        db.reset()
        assert db.getJobObject().status == status, 'status shouldn\'t have changed'

    db.getJobObject().status = 'completing'
    db.reset()
    assert db.getJobObject().status == 'submitted', 'didn\t reset job'
    assert [j.status for j in db.getJobObject().subjobs] != ['submitted', 'submitted'], 'subjobs not reset properly'

    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs] == ['submitted', 'submitted'], 'subjobs not reset properly'

    for j in db.getJobObject().subjobs:
        j.status = 'completed'
    db.reset(doSubjobs=True)
    assert [j.status for j in db.getJobObject().subjobs] != ['submitted', 'submitted'], 'subjobs not supposed to reset'
Esempio n. 10
0
def test_getOutputSandbox(db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=True) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        temp_dir,
                                        cred_req=mocker.ANY)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=True) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        test_dir,
                                        cred_req=mocker.ANY)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               side_effect=GangaDiracError('test Exception')) as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Esempio n. 11
0
def test__resubmit(db):
    from Ganga.Core import BackendError

    def _common_submit(dirac_script):
        return '_common_submit run ok'

    masterj = Job()
    masterj.id = 0
    masterj.backend = db
    j = Job()
    j.id = 1
    j.backend = db
    db._setParent(masterj)

    with patch.object(db, '_common_submit', return_value='_common_submit run ok'):
        with pytest.raises(BackendError):
            db._resubmit()
Esempio n. 12
0
def test__resubmit(db):
    from Ganga.Core.exceptions import BackendError

    def _common_submit(dirac_script):
        return '_common_submit run ok'

    masterj = Job()
    masterj.id = 0
    masterj.backend = db
    j = Job()
    j.id = 1
    j.backend = db
    db._setParent(masterj)

    with patch.object(db, '_common_submit', return_value='_common_submit run ok'):
        with pytest.raises(BackendError):
            db._resubmit()
Esempio n. 13
0
 def split(self,job):
     from Ganga.GPIDev.Lib.Job import Job
     subjobs = []
     if self.fail == 'exception':
         x = 'triggered failure during splitting'
         raise Exception(x)
     for b in self.backs:
         j = Job()
         j.copyFrom(job)
         j.backend = b
         subjobs.append(j)
     return subjobs
Esempio n. 14
0
 def split(self, job):
     from Ganga.GPIDev.Lib.Job import Job
     subjobs = []
     if self.fail == 'exception':
         x = 'triggered failure during splitting'
         raise Exception(x)
     for b in self.backs:
         j = Job()
         j.copyFrom(job)
         j.backend = b
         subjobs.append(j)
     return subjobs
Esempio n. 15
0
def create_gaudi_subjob(job, inputdata):
    j = Job()
    j.name = job.name
    j.application = copy_app(job.application)
    j.backend = job.backend # no need to deepcopy
    if inputdata:
        j.inputdata = inputdata
        if hasattr(j.application,'extra'):
            j.application.extra.inputdata = j.inputdata
    else:
        j.inputdata = None
        if hasattr(j.application,'extra'):
            j.application.extra.inputdata = BesDataset()
    j.outputsandbox = job.outputsandbox[:]
    j.outputdata = job.outputdata
    return j
Esempio n. 16
0
def test__common_submit(tmpdir, db):
    from Ganga.Core import BackendError
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(
            script_template.replace('###PARAMETRIC_INPUTDATA###',
                                    str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={}):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={
                   'OK': True,
                   'Value': 12345
               }) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={
                   'OK': True,
                   'Value': [123, 456]
               }):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
Esempio n. 17
0
def test__setup_bulk_subjobs(tmpdir, db):
    from Ganga.Core.exceptions import BackendError
    from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset
    from GangaDirac.Lib.Backends import Dirac

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(
            script_template.replace('###PARAMETRIC_INPUTDATA###',
                                    str([['a'], ['b']])))

    with pytest.raises(BackendError):
        db._setup_bulk_subjobs([], name)

    d = Dirac()
    j = Job()
    j.id = 0  # This would normally be set by the registry if this was a proxy job
    j.application = Executable()
    j.splitter = ArgSplitter()
    j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']]
    j.inputdata = Dataset()
    j.backend = d
    d._parent = j

    dirac_ids = [123, 456]

    def fake_setup_subjob_dataset(dataset):
        assert dataset in [['a'], ['b']], 'dataset not passed properly'

    with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset):
        assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run'

    assert len(j.subjobs) == len(dirac_ids), 'didnt work'
    for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids,
                                       j.subjobs):
        assert id_ == subjob.id, 'ids dont match'
        assert backend_id == subjob.backend.id, 'backend.ids dont match'
        assert isinstance(subjob.application,
                          j.application.__class__), 'apps dont match'
        assert subjob.splitter is None, 'splitter not done'
        assert isinstance(subjob.backend,
                          j.backend.__class__), 'backend dont match'
Esempio n. 18
0
def test__common_submit(tmpdir, db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    from Ganga.Core.exceptions import BackendError

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(
            script_template.replace('###PARAMETRIC_INPUTDATA###',
                                    str([['a'], ['b']])))

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               side_effect=GangaDiracError('test Exception')):
        db.id = 1234
        db.actualCE = 'test'
        db.status = 'test'
        with pytest.raises(BackendError):
            db._common_submit(name)

        assert db.id is None, 'id not None'
        assert db.actualCE is None, 'actualCE not None'
        assert db.status is None, 'status not None'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=12345) as execute:
        assert db._common_submit(name)

        execute.assert_called_once_with("execfile('%s')" % name,
                                        cred_req=mocker.ANY)

        assert db.id == 12345, 'id not set'

    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value=[123, 456]):
        with patch.object(db, '_setup_bulk_subjobs') as _setup_bulk_subjobs:
            db._common_submit(name)
            _setup_bulk_subjobs.assert_called_once_with([123, 456], name)
Esempio n. 19
0
def test_getOutputSandbox(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % temp_dir)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % test_dir)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute') as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Esempio n. 20
0
def test_submit(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    file1 = tempfile.NamedTemporaryFile('w')
    file2 = tempfile.NamedTemporaryFile('w')
    file3 = tempfile.NamedTemporaryFile('w')
    sjc = StandardJobConfig(
        exe=script_template,
        inputbox=[File(file1.name),
                  File(file2.name),
                  File(file3.name)],
        outputbox=['d', 'e', 'f'])

    def fake_common_submit(dirac_script):
        with open(dirac_script, 'r') as f:
            script = f.read()
            assert script != script_template, 'script not changed'
            assert script_template.replace(
                '##INPUT_SANDBOX##',
                str(['a', 'b', 'c'] + [
                    os.path.join(j.getInputWorkspace().getPath(),
                                 '_input_sandbox_0.tgz')
                ] + ['g'])) == script, 'script not what it should be'

        return True

    with patch.object(db, '_addition_sandbox_content',
                      return_value=['g']) as _addition_sandbox_content:
        with patch.object(db, '_common_submit',
                          Mock(fake_common_submit)) as _common_submit:
            assert db.submit(sjc, ['a', 'b', 'c'])

            _addition_sandbox_content.assert_called_once_with(sjc)
            _common_submit.assert_called_once()

    file1.close()
    file2.close()
    file3.close()
Esempio n. 21
0
def test_removeOutputData(db):
    from GangaDirac.Lib.Files.DiracFile import DiracFile

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################

    class TestFile(object):
        def __init__(self):
            pass

        def remove(self):
            return 27

    #######################

    def fake_outputfiles_foreach(job, file_type, func):
        import types
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
            assert isinstance(func, types.FunctionType)
            assert func(TestFile()) == 27, 'Didn\'t call remove function'

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_foreach',
               fake_outputfiles_foreach):
        subjob = False
        assert db.removeOutputData() is None

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.removeOutputData() is None
Esempio n. 22
0
def test_removeOutputData(db):
    from GangaDirac.Lib.Files.DiracFile import DiracFile

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    #######################

    class TestFile(object):
        def __init__(self):
            pass

        def remove(self):
            return 27

    #######################

    def fake_outputfiles_foreach(job, file_type, func):
        import types
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
            assert isinstance(func, types.FunctionType)
            assert func(TestFile()) == 27, 'Didn\'t call remove function'

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_foreach', fake_outputfiles_foreach):
        subjob = False
        assert db.removeOutputData() is None

        j.subjobs = [Job(), Job(), Job()]
        for sj in j.subjobs:
            sj._setParent(j)

        subjob = True
        assert db.removeOutputData() is None
Esempio n. 23
0
    def split(self, job):
        from Ganga.GPIDev.Lib.Job import Job
        subjobs = []
        primeTables = job.inputdata.get_dataset()

        ## avoid creating jobs with nothing to do
        if self.numsubjobs > len(primeTables):
            self.numsubjobs = len(primeTables)

        ## create subjobs
        for i in range(self.numsubjobs):
            j = Job()
            j.application = job.application
            j.inputdata = job.inputdata
            j.inputdata.table_id_lower = 1
            j.inputdata.table_id_upper = 1
            j.outputdata = job.outputdata
            j.inputsandbox = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            j.backend = job.backend
            subjobs.append(j)

        ## chunksize of each subjob
        chunksize = len(primeTables) / self.numsubjobs

        offset = 0
        for i in range(len(subjobs)):
            my_chunksize = chunksize
            if len(primeTables) % self.numsubjobs >= i + 1: my_chunksize += 1

            ## set lower bound id (inclusive)
            subjobs[i].inputdata.table_id_lower = offset + 1
            ## fill subjob with prime tables
            #for j in range(my_chunksize):
            #    subjobs[i].application.addPrimeTable(primeTables[offset+j])
            offset += my_chunksize
            ## set upper  bound id (inclusive)
            subjobs[i].inputdata.table_id_upper = offset

        return subjobs
Esempio n. 24
0
    def split(self,job):
        from Ganga.GPIDev.Lib.Job import Job
        subjobs = []
        primeTables = job.inputdata.get_dataset()

        ## avoid creating jobs with nothing to do
        if self.numsubjobs > len(primeTables):
            self.numsubjobs = len(primeTables)

        ## create subjobs
        for i in range(self.numsubjobs):
            j = Job()
            j.application   = job.application
            j.inputdata     = job.inputdata 
            j.inputdata.table_id_lower = 1 
            j.inputdata.table_id_upper = 1
            j.outputdata    = job.outputdata 
            j.inputsandbox  = job.inputsandbox
            j.outputsandbox = job.outputsandbox
            j.backend       = job.backend
            subjobs.append(j)

        ## chunksize of each subjob
        chunksize = len(primeTables) / self.numsubjobs

        offset = 0
        for i in range(len(subjobs)):
            my_chunksize = chunksize
            if len(primeTables) % self.numsubjobs >= i+1: my_chunksize+=1

            ## set lower bound id (inclusive)
            subjobs[i].inputdata.table_id_lower = offset+1
            ## fill subjob with prime tables 
            #for j in range(my_chunksize):
            #    subjobs[i].application.addPrimeTable(primeTables[offset+j])
            offset += my_chunksize
            ## set upper  bound id (inclusive)
            subjobs[i].inputdata.table_id_upper = offset

        return subjobs
Esempio n. 25
0
def test_getOutputSandbox(db, mocker):
    mocker.patch('Ganga.GPIDev.Credentials.credential_store')

    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=True) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % temp_dir, cred_req=mocker.ANY)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute', return_value=True) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" % test_dir, cred_req=mocker.ANY)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute', side_effect=GangaDiracError('test Exception')) as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Esempio n. 26
0
def test__setup_bulk_subjobs(tmpdir, db):
    from Ganga.Core import BackendError
    from Ganga.GPIDev.Lib.Dataset.Dataset import Dataset
    from GangaDirac.Lib.Backends import Dirac

    name = str(tmpdir.join('submit_script'))
    with open(name, 'w') as fd:
        fd.write(script_template.replace('###PARAMETRIC_INPUTDATA###', str([['a'], ['b']])))

    with pytest.raises(BackendError):
        db._setup_bulk_subjobs([], name)

    d = Dirac()
    j = Job()
    j.id = 0  # This would normally be set by the registry if this was a proxy job
    j.application = Executable()
    j.splitter = ArgSplitter()
    j.splitter.args = [['a'], ['b'], ['c'], ['d'], ['e']]
    j.inputdata = Dataset()
    j.backend = d
    d._parent = j

    dirac_ids = [123, 456]

    def fake_setup_subjob_dataset(dataset):
        assert dataset in [['a'], ['b']], 'dataset not passed properly'

    with patch.object(d, '_setup_subjob_dataset', fake_setup_subjob_dataset):
        assert d._setup_bulk_subjobs(dirac_ids, name), 'didnt run'

    assert len(j.subjobs) == len(dirac_ids), 'didnt work'
    for id_, backend_id, subjob in zip(range(len(dirac_ids)), dirac_ids, j.subjobs):
        assert id_ == subjob.id, 'ids dont match'
        assert backend_id == subjob.backend.id, 'backend.ids dont match'
        assert isinstance(subjob.application, j.application.__class__), 'apps dont match'
        assert subjob.splitter is None, 'splitter not done'
        assert isinstance(subjob.backend, j.backend.__class__), 'backend dont match'
Esempio n. 27
0
def test_submit(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    file1 = tempfile.NamedTemporaryFile('w')
    file2 = tempfile.NamedTemporaryFile('w')
    file3 = tempfile.NamedTemporaryFile('w')
    sjc = StandardJobConfig(exe=script_template,
                            inputbox=[File(file1.name),
                                      File(file2.name),
                                      File(file3.name)],
                            outputbox=['d', 'e', 'f'])

    def fake_common_submit(dirac_script):
        with open(dirac_script, 'r') as f:
            script = f.read()
            assert script != script_template, 'script not changed'
            assert script_template.replace('##INPUT_SANDBOX##',
                                           str(['a', 'b', 'c'] +
                                               [os.path.join(j.getInputWorkspace().getPath(),
                                                             '_input_sandbox_0.tgz')] +
                                               ['g'])) == script, 'script not what it should be'

        return True

    with patch.object(db, '_addition_sandbox_content', return_value=['g']) as _addition_sandbox_content:
        with patch.object(db, '_common_submit', Mock(fake_common_submit)) as _common_submit:
            assert db.submit(sjc, ['a', 'b', 'c'])

            _addition_sandbox_content.assert_called_once_with(sjc)
            _common_submit.assert_called_once()

    file1.close()
    file2.close()
    file3.close()
Esempio n. 28
0
def test_getOutputSandbox(db):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j
    db.id = 1234

    temp_dir = j.getOutputWorkspace().getPath()
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(), 'didn\'t run'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        temp_dir)

    test_dir = 'test_dir'
    with patch('GangaDirac.Lib.Backends.DiracBase.execute',
               return_value={'OK': True}) as execute:
        assert db.getOutputSandbox(test_dir), 'didn\'t run with modified dir'
        execute.assert_called_once_with("getOutputSandbox(1234,'%s')" %
                                        test_dir)

    with patch('GangaDirac.Lib.Backends.DiracBase.execute') as execute:
        assert not db.getOutputSandbox(test_dir), 'didn\'t fail gracefully'
        execute.assert_called_once()
Esempio n. 29
0
def test_getOutputData(db, tmpdir):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    with pytest.raises(GangaException):
        db.getOutputData('/false/dir')

    #######################
    class TestFile(object):
        def __init__(self, lfn, namePattern):
            self.lfn = lfn
            self.namePattern = namePattern

        def get(self):
            self.check = 42

    test_files = [TestFile('a', 'alpha'), TestFile('', 'delta'),
                  TestFile('b', 'beta'), TestFile('', 'bravo'),
                  TestFile('c', 'charlie'), TestFile('', 'foxtrot')]

    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return test_files

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator', fake_outputfiles_iterator):

        # master jobs
        #######################
        subjob = False
        assert db.getOutputData() == ['a', 'b', 'c']
        for f in test_files:
            if f.lfn in ['a', 'b', 'c']:
                assert f.localDir == j.getOutputWorkspace().getPath()
                assert f.check, 42 == "didn't call get"
            else:
                assert not hasattr(f, 'localDir')
                assert not hasattr(f, 'check')
        assert db.getOutputData(None, ['alpha', 'charlie']) == ['a', 'c']
        assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c']

        # subjobs
        ########################
        j.subjobs = [Job(), Job(), Job()]
        i = 0
        for sj in j.subjobs:
            sj._setParent(j)
            sj.id = i
            i += 1

        subjob = True
        assert db.getOutputData() == ['a', 'b', 'c'] * 3
        assert db.getOutputData(None, ['beta']) == ['b'] * 3
        assert db.getOutputData(tmpdir.dirname, ['alpha', 'charlie']) == ['a', 'c'] * 3
        for i in range(3):
            assert os.path.isdir(os.path.join(tmpdir.dirname, '0.%d' % i))
            os.rmdir(os.path.join(tmpdir.dirname, '0.%d' % i))
Esempio n. 30
0
def test_getOutputData(db, tmpdir):
    j = Job()
    j.id = 0
    j.backend = db
    db._parent = j

    with pytest.raises(GangaException):
        db.getOutputData('/false/dir')

    #######################
    class TestFile(object):
        def __init__(self, lfn, namePattern):
            self.lfn = lfn
            self.namePattern = namePattern

        def get(self):
            self.check = 42

    test_files = [
        TestFile('a', 'alpha'),
        TestFile('', 'delta'),
        TestFile('b', 'beta'),
        TestFile('', 'bravo'),
        TestFile('c', 'charlie'),
        TestFile('', 'foxtrot')
    ]

    #######################

    def fake_outputfiles_iterator(job, file_type):
        assert isinstance(job, Job)
        if subjob:
            assert job.master is not None
        else:
            assert job.master is None
            assert file_type == DiracFile
        return test_files

    with patch('GangaDirac.Lib.Backends.DiracBase.outputfiles_iterator',
               fake_outputfiles_iterator):

        # master jobs
        #######################
        subjob = False
        assert db.getOutputData() == ['a', 'b', 'c']
        for f in test_files:
            if f.lfn in ['a', 'b', 'c']:
                assert f.localDir == j.getOutputWorkspace().getPath()
                assert f.check, 42 == "didn't call get"
            else:
                assert not hasattr(f, 'localDir')
                assert not hasattr(f, 'check')
        assert db.getOutputData(None, ['alpha', 'charlie']) == ['a', 'c']
        assert db.getOutputData(tmpdir.dirname,
                                ['alpha', 'charlie']) == ['a', 'c']

        # subjobs
        ########################
        j.subjobs = [Job(), Job(), Job()]
        i = 0
        for sj in j.subjobs:
            sj._setParent(j)
            sj.id = i
            i += 1

        subjob = True
        assert db.getOutputData() == ['a', 'b', 'c'] * 3
        assert db.getOutputData(None, ['beta']) == ['b'] * 3
        assert db.getOutputData(tmpdir.dirname,
                                ['alpha', 'charlie']) == ['a', 'c'] * 3
        for i in range(3):
            assert os.path.isdir(os.path.join(tmpdir.dirname, '0.%d' % i))
            os.rmdir(os.path.join(tmpdir.dirname, '0.%d' % i))