Ejemplo n.º 1
0
def testSplitting(repository, LEN):
    # top level splitting
    mj = Job()
    jj = []
    for i in range(LEN):
        sj = Job()
        sj._setParent(mj)
        sj.application.exe = "/bin/myexe" + str(i)
        sj.application.args = 1000 * ["/ab" + str(i)]
        jj.append(sj)
    mj.subjobs = jj

    # check registration
    repository.registerJobs([mj])
    for s in mj.subjobs:
        assert s.master is mj
        assert s.id != None

    # check ci/co
    # repository.commitJobs([j._impl])
    mid = mj.id
    j = repository.checkoutJobs([mid])[0]
    assert len(j.subjobs) == LEN

    # another ci/co check
    j.subjobs[1].application.exe = "/bin/ls"
    j.application.exe = "/bin/pwd"
    repository.commitJobs([j])
    j = repository.checkoutJobs([mid])[0]

    assert j.subjobs[1].application.exe == "/bin/ls"
    assert j.application.exe == "/bin/pwd"

    # check set status
    status_list = [
        ((mid, j.subjobs[1].id), "running"),
        ((mid, j.subjobs[2].id), "running"),
        ((mid, j.subjobs[3].id), "running"),
    ]
    repository.setJobsStatus(status_list)

    # check get status
    md = repository.getJobsStatus(map(lambda x: x[0], status_list))
    for i in range(len(md)):
        assert md[i][0] == status_list[i][0]
        assert md[i][1] == status_list[i][1]

    # check getting job status in another way
    ttt = {"table_path": repository._getSubJobPath((mid,)), "attributes": {}}
    md = repository.getJobsStatus(ttt)
    for i in range(len(status_list)):
        if md[i][0] == status_list[i][0]:
            assert md[i][1] == status_list[i][1]

    # check delete job
    repository.deleteJobs([mj.id])
Ejemplo n.º 2
0
def getSplitJob(LEN = 10):
    # top level splitting
    mj = Job()
    jj = []
    for i in range(LEN):
        sj = Job()
        sj.application.exe = '/bin/myexe' + str(i)
        sj.application.args = ['/'+ ARG_LEN*'abc' + str(i)]
        sj._setParent(mj)
        jj.append(sj)
    mj.subjobs = jj
    return mj
Ejemplo n.º 3
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        """
        This is the old bulk submit method which is used to construct the subjobs for a parametric job
        Args:
            dirac_ids (list): This is a list of the Dirac ids which have been created
            dirac_script (str): Name of the dirac script which contains the job jdl
        """
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError(
                'Dirac',
                'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC'
            )

        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True
Ejemplo n.º 4
0
def runTest(NJOBS, NRUN, rootDir, output_dir, rep_type):
    if DEBUG:
        print 'from runTest: rootDir %s, output_dir %s'%(rootDir, output_dir)
    if rep_type == "Remote":
        repository = repositoryFactory(repositoryType = rep_type,
                                       root_dir  = rootDir,
                                       streamer  = SimpleJobStreamer(),
                                       host      = 'gangamd.cern.ch',
                                       port      = 8822,
                                       login     = os.getlogin(),
                                       keepalive = True)
    elif rep_type == "Local":
        repository = repositoryFactory(repositoryType = rep_type,
                                       root_dir = rootDir,
                                       streamer = SimpleJobStreamer(),
                                       local_root = '/tmp')
    else:
        print "Wrong type of repository..."
        print "Exiting ..."
        return
    nn = tempfile.mktemp(suffix = '.test')
    nn = os.path.join(output_dir, os.path.basename(nn))
    ff = file(nn, 'w')
    try:
        jjj = []
        for n in range(NRUN):
            ff.write("NUMBER of jobs in the repository %d \n" %len(jjj))
            jj = []
            for i in range(NJOBS):
                j = Job()
                #j.application = Gaudi()
                j.name = "MyJob" + str(i)
                jj.append(j)

            jjj.extend(jj)
            t1 = _startText(ff, 'registering %d jobs...' % NJOBS)
            repository.registerJobs(jj)
            if DEBUG:
                print "--->command status", "OK", "\n"
            _endText(ff, t1)


        t1 = _startText(ff, 'deleting jobs...')
        repository.deleteJobs(map(lambda j: j.id, jjj))
        if DEBUG:
            print "--->command status", "OK", "\n"
        _endText(ff, t1)

    finally:
        ff.close()
Ejemplo n.º 5
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        """
        This is the old bulk submit method which is used to construct the subjobs for a parametric job
        Args:
            dirac_ids (list): This is a list of the Dirac ids which have been created
            dirac_script (str): Name of the dirac script which contains the job jdl
        """
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC')

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True
Ejemplo n.º 6
0
def runTest(NTEST, rootDir, output_dir, rep_type):
    if DEBUG:
        print "from runTest: rootDir %s, output_dir %s" % (rootDir, output_dir)
    if rep_type == "Remote":
        repository = repositoryFactory(
            repositoryType=rep_type,
            root_dir=rootDir,
            streamer=SimpleJobStreamer(),
            host="lxgate41.cern.ch",
            port=8822,
            login=os.getlogin(),
            keepalive=True,
        )
    elif rep_type == "Local":
        repository = repositoryFactory(
            repositoryType=rep_type, root_dir=rootDir, streamer=SimpleJobStreamer(), local_root=os.path.expanduser("~")
        )
    else:
        print "Wrong type of repository..."
        print "Exiting ..."
        return
    nn = tempfile.mktemp(suffix=".test")
    nn = os.path.join(output_dir, os.path.basename(nn))
    ff = file(nn, "w")
    try:
        jj = []
        for i in range(NTEST):
            j = Job()
            # j.application = Gaudi()
            j.name = "MyJob" + str(i)
            j.application.args = 1000 * ["/abc"]
            jj.append(j)

        # ----------------------------------------------------
        t1 = _startText(ff, "registering %d jobs..." % NTEST)
        if DEBUG:
            print "registering %d jobs..." % NTEST
        try:
            repository.registerJobs(jj)
        except Exception, e:
            print "EXCEPTION in registerJobs", str(e)
            if DEBUG:
                print "--->command status", "FAIL", "\n"
        else:
Ejemplo n.º 7
0
    def master_updateMonitoringInformation(jobs):
        """Updates the statuses of the list of jobs provided by issuing crab -status."""
        logger.info('Updating the monitoring information of ' + str(len(jobs)) + ' jobs')
        try:
            from Ganga.GPIDev.Lib.Job.Job import Job
            import sys, traceback
 
            for j in jobs:
                server = CRABServer()
                logger.debug('Updating monitoring information for job %d (%s)' % (j.id, j.status))
                try:
                    dictresult, status, reason = server.status(j)
                    logger.info('CRAB3 server call answer status: %s - reason: %s' % (status, reason))
                    joblist = sorted(dictresult['result'][0]['jobList'], key=lambda x:x[1])
                except KeyError:
                    logger.info('Get status for job %d didn\'t return job list, skipping job for now.' % j.id)
                    
                    continue
                except: 
                    logger.error('Get status for job %d failed, skipping.' % j.id)
                    raise

                if joblist:
                    logger.info('There are subjob statuses for job %s' % j.id)
                    logger.info('j: %s' % dir(j))
                    if not j.subjobs:
                        logger.warning('No subjob object for job %s' % j.id)
                        j.subjobs = []
                        for i in xrange(len(joblist)):
                            subjob = joblist[i]
                            index  = int(subjob[1])
                            logger.info('Processing subjob %d, %s' % (index, subjob))
                            sj = Job()
                            sj.copyFrom(j)
                            sj.backend.crabid = index
                            sj.id = i
                            sj.updateStatus('submitting')
                            sj.backend.checkReport(subjob)
                            sj.backend.checkStatus()
                            j.subjobs.append(sj)
                        #j.subjobs = sorted(j.subjobs, key=lambda x: x.backend.id) 
                        #j._commit()  
                    else:
                        for subjob in joblist:
                            index  = int(subjob[1])
                            logger.debug('Found subjob %s searching with index %s' % (j.subjobs[index-1].backend.crabid, index))
                            j.subjobs[index-1].backend.checkReport(subjob)                   
                            j.subjobs[index-1].backend.checkStatus()

                    j.updateMasterJobStatus()
                else:
                    logger.info('There are no subjobs for job %s' % (j.id))
                    logger.info('checking task status from report: %s' % dictresult['result'][0]['status'])
                    taskstatus = dictresult['result'][0]['status']
                    if taskstatus in ['FAILED']:
                        logger.info('Job failed: %s' % dictresult)
                        j.updateStatus('failed')
        except Exception as e:
            logger.error(e)
            traceback.print_exc(file=sys.stdout)
Ejemplo n.º 8
0
    def master_setup_bulk_subjobs(self, jobs, jdefids):

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        for i in range(len(jdefids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend = Panda()
            j.backend.id = jdefids[i]
            j.id = i
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True
Ejemplo n.º 9
0
 def master_setup_bulk_subjobs(self, jobs, jdefids):
        
     from Ganga.GPIDev.Lib.Job.Job import Job
     master_job=self.getJobObject()
     for i in range(len(jdefids)):
         j=Job()
         j.copyFrom(master_job)
         j.splitter = None
         j.backend=Panda()
         j.backend.id = jdefids[i]
         j.id = i
         j.status = 'submitted'
         j.time.timenow('submitted')
         master_job.subjobs.append(j)
     return True
Ejemplo n.º 10
0
    def createSubjob(self, job, additional_skip_args=None):
        """ Create a new subjob by copying the master job and setting all fields correctly.
        """
        from Ganga.GPIDev.Lib.Job.Job import Job
        if additional_skip_args is None:
            additional_skip_args = []

        j = Job()
        skipping_args = [
            'splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs'
        ]
        for arg in additional_skip_args:
            skipping_args.append(arg)
        j.copyFrom(job, skipping_args)
        j.splitter = None
        j.inputsandbox = []
        j.inputfiles = []
        j.inputdata = None
        return j
Ejemplo n.º 11
0
    def createSubjob(self, job, additional_skip_args=None):
        """ Create a new subjob by copying the master job and setting all fields correctly.
        """
        from Ganga.GPIDev.Lib.Job.Job import Job
        if additional_skip_args is None:
            additional_skip_args = []

        j = Job()
        skipping_args = ['splitter', 'inputsandbox', 'inputfiles', 'inputdata', 'subjobs']
        for arg in additional_skip_args:
            skipping_args.append(arg)
        j.copyFrom(job, skipping_args)
        j.splitter = None
        j.inputsandbox = []
        j.inputfiles = []
        j.inputdata = None
        return j
Ejemplo n.º 12
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC')

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        master_job._commit()
        return True
Ejemplo n.º 13
0
def submit(N, K):
    jobs = []
    for i in range(K):
        j = Job()
        j._auto__init__()
        j.backend = LCG()
        j.backend.middleware = 'GLITE'
        j.splitter = GenericSplitter()
        j.splitter.attribute = 'application.args'
        j.splitter.values = [['x']] * N
        j.submit()
        jobs.append(j)
    import time

    def finished():
        for j in jobs:
            if not j.status in ['failed', 'completed']:
                return False
        return True

    while not finished():
        time.sleep(1)

    return jobs
Ejemplo n.º 14
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC')

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        master_job._commit()
        return True
Ejemplo n.º 15
0
def makeRegisteredJob():
    """Makes a new Job and registers it with the Registry"""
    from Ganga.GPIDev.Lib.Job.Job import Job
    j = Job()
    j._auto__init__()
    return j
Ejemplo n.º 16
0
    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(CoreTransform, self).createUnits()

        # Use the given splitter to create the unit definitions
        if len(self.units) > 0:
            # already have units so return
            return

        if self.unit_splitter == None and len(self.inputdata) == 0:
            raise ApplicationConfigurationError(
                None,
                "No unit splitter or InputData provided for CoreTransform unit creation, Transform %d (%s)"
                % (self.getID(), self.name),
            )

        # -----------------------------------------------------------------
        # split over unit_splitter by preference
        if self.unit_splitter:

            # create a dummy job, assign everything and then call the split
            j = Job()
            j.backend = self.backend.clone()
            j.application = self.application.clone()

            if self.inputdata:
                j.inputdata = self.inputdata.clone()

            subjobs = self.unit_splitter.split(j)

            if len(subjobs) == 0:
                raise ApplicationConfigurationError(
                    None,
                    "Unit splitter gave no subjobs after split for CoreTransform unit creation, Transform %d (%s)"
                    % (self.getID(), self.name),
                )

            # only copy the appropriate elements
            fields = []
            if len(self.fields_to_copy) > 0:
                fields = self.fields_to_copy
            elif isType(self.unit_splitter, GenericSplitter):
                if self.unit_splitter.attribute != "":
                    fields = [self.unit_splitter.attribute.split(".")[0]]
                else:
                    for attr in self.unit_splitter.multi_attrs.keys():
                        fields.append(attr.split(".")[0])

            # now create the units from these jobs
            for sj in subjobs:
                unit = CoreUnit()

                for attr in fields:
                    setattr(unit, attr, copy.deepcopy(getattr(sj, attr)))

                self.addUnitToTRF(unit)

        # -----------------------------------------------------------------
        # otherwise split on inputdata
        elif len(self.inputdata) > 0:

            if self.files_per_unit > 0:

                # combine all files and split accorindgly
                filelist = []
                for ds in self.inputdata:

                    if isType(ds, GangaDataset):
                        for f in ds.files:
                            if f.containsWildcards():
                                # we have a wildcard so grab the subfiles
                                for sf in f.getSubFiles(process_wildcards=True):
                                    filelist.append(sf)
                            else:
                                # no wildcards so just add the file
                                filelist.append(f)
                    else:
                        logger.warning("Dataset '%s' doesn't support files" % getName(ds))

                # create DSs and units for this list of files
                fid = 0
                while fid < len(filelist):
                    unit = CoreUnit()
                    unit.name = "Unit %d" % len(self.units)
                    unit.inputdata = GangaDataset(files=filelist[fid : fid + self.files_per_unit])
                    unit.inputdata.treat_as_inputfiles = self.inputdata[0].treat_as_inputfiles

                    fid += self.files_per_unit

                    self.addUnitToTRF(unit)

            else:
                # just produce one unit per dataset
                for ds in self.inputdata:

                    # avoid splitting over chain inputs
                    if isType(ds, TaskChainInput):
                        continue

                    unit = CoreUnit()
                    unit.name = "Unit %d" % len(self.units)
                    unit.inputdata = copy.deepcopy(ds)
                    self.addUnitToTRF(unit)
Ejemplo n.º 17
0
def makeRegisteredJob():
    """Makes a new Job and registers it with the Registry"""
    from Ganga.GPIDev.Lib.Job.Job import Job
    j = Job()
    j._auto__init__()
    return j
Ejemplo n.º 18
0
    def master_updateMonitoringInformation(jobs):
        """Updates the statuses of the list of jobs provided by issuing crab -status."""
        logger.info('Updating the monitoring information of ' +
                    str(len(jobs)) + ' jobs')
        try:
            from Ganga.GPIDev.Lib.Job.Job import Job
            import sys, traceback

            for j in jobs:
                server = CRABServer()
                logger.debug(
                    'Updating monitoring information for job %d (%s)' %
                    (j.id, j.status))
                try:
                    dictresult, status, reason = server.status(j)
                    logger.info(
                        'CRAB3 server call answer status: %s - reason: %s' %
                        (status, reason))
                    joblist = sorted(dictresult['result'][0]['jobList'],
                                     key=lambda x: x[1])
                except KeyError:
                    logger.info(
                        'Get status for job %d didn\'t return job list, skipping job for now.'
                        % j.id)

                    continue
                except:
                    logger.error('Get status for job %d failed, skipping.' %
                                 j.id)
                    raise

                if joblist:
                    logger.info('There are subjob statuses for job %s' % j.id)
                    logger.info('j: %s' % dir(j))
                    if not j.subjobs:
                        logger.warning('No subjob object for job %s' % j.id)
                        j.subjobs = []
                        for i in xrange(len(joblist)):
                            subjob = joblist[i]
                            index = int(subjob[1])
                            logger.info('Processing subjob %d, %s' %
                                        (index, subjob))
                            sj = Job()
                            sj.copyFrom(j)
                            sj.backend.crabid = index
                            sj.id = i
                            sj.updateStatus('submitting')
                            sj.backend.checkReport(subjob)
                            sj.backend.checkStatus()
                            j.subjobs.append(sj)
                        #j.subjobs = sorted(j.subjobs, key=lambda x: x.backend.id)
                        #j._commit()
                    else:
                        for subjob in joblist:
                            index = int(subjob[1])
                            logger.debug(
                                'Found subjob %s searching with index %s' %
                                (j.subjobs[index - 1].backend.crabid, index))
                            j.subjobs[index - 1].backend.checkReport(subjob)
                            j.subjobs[index - 1].backend.checkStatus()

                    j.updateMasterJobStatus()
                else:
                    logger.info('There are no subjobs for job %s' % (j.id))
                    logger.info('checking task status from report: %s' %
                                dictresult['result'][0]['status'])
                    taskstatus = dictresult['result'][0]['status']
                    if taskstatus in ['FAILED']:
                        logger.info('Job failed: %s' % dictresult)
                        j.updateStatus('failed')
        except Exception as e:
            logger.error(e)
            traceback.print_exc(file=sys.stdout)
Ejemplo n.º 19
0
    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(CoreTransform, self).createUnits()

        # Use the given splitter to create the unit definitions
        if len(self.units) > 0:
            # already have units so return
            return

        if self.unit_splitter == None and len(self.inputdata) == 0:
            raise ApplicationConfigurationError(
                "No unit splitter or InputData provided for CoreTransform unit creation, Transform %d (%s)"
                % (self.getID(), self.name))

        # -----------------------------------------------------------------
        # split over unit_splitter by preference
        if self.unit_splitter:

            # create a dummy job, assign everything and then call the split
            j = Job()
            j.backend = self.backend.clone()
            j.application = self.application.clone()

            if self.inputdata:
                j.inputdata = self.inputdata.clone()

            subjobs = self.unit_splitter.split(j)

            if len(subjobs) == 0:
                raise ApplicationConfigurationError(
                    "Unit splitter gave no subjobs after split for CoreTransform unit creation, Transform %d (%s)"
                    % (self.getID(), self.name))

            # only copy the appropriate elements
            fields = []
            if len(self.fields_to_copy) > 0:
                fields = self.fields_to_copy
            elif isType(self.unit_splitter, GenericSplitter):
                if self.unit_splitter.attribute != "":
                    fields = [self.unit_splitter.attribute.split(".")[0]]
                else:
                    for attr in self.unit_splitter.multi_attrs.keys():
                        fields.append(attr.split(".")[0])

            # now create the units from these jobs
            for sj in subjobs:
                unit = CoreUnit()

                for attr in fields:
                    setattr(unit, attr, copy.deepcopy(getattr(sj, attr)))

                self.addUnitToTRF(unit)

        # -----------------------------------------------------------------
        # otherwise split on inputdata
        elif len(self.inputdata) > 0:

            if self.files_per_unit > 0:

                # combine all files and split accorindgly
                filelist = []
                for ds in self.inputdata:

                    if isType(ds, GangaDataset):
                        for f in ds.files:
                            if f.containsWildcards():
                                # we have a wildcard so grab the subfiles
                                for sf in f.getSubFiles(
                                        process_wildcards=True):
                                    filelist.append(sf)
                            else:
                                # no wildcards so just add the file
                                filelist.append(f)
                    else:
                        logger.warning("Dataset '%s' doesn't support files" %
                                       getName(ds))

                # create DSs and units for this list of files
                fid = 0
                while fid < len(filelist):
                    unit = CoreUnit()
                    unit.name = "Unit %d" % len(self.units)
                    unit.inputdata = GangaDataset(
                        files=filelist[fid:fid + self.files_per_unit])
                    unit.inputdata.treat_as_inputfiles = self.inputdata[
                        0].treat_as_inputfiles

                    fid += self.files_per_unit

                    self.addUnitToTRF(unit)

            else:
                # just produce one unit per dataset
                for ds in self.inputdata:

                    # avoid splitting over chain inputs
                    if isType(ds, TaskChainInput):
                        continue

                    unit = CoreUnit()
                    unit.name = "Unit %d" % len(self.units)
                    unit.inputdata = copy.deepcopy(ds)
                    self.addUnitToTRF(unit)
Ejemplo n.º 20
0
    def master_updateMonitoringInformation(jobs):
        """Updates the statuses of the list of jobs provided by issuing crab -status."""
        logger.info('Updating the monitoring information of ' + str(len(jobs)) + ' jobs')

        from CRABAPI.RawCommand import crabCommand
        from CRABClient.ClientExceptions import ConfigurationException
        import httplib

        for j in jobs:

            logger.info('Updating monitoring information for job %d (%s)' % (j.id, j.status))
            if not j.backend.requestname:
                logger.warning("Couldn't find request name for job %s. Skipping" % s)
                continue
            crab_work_dir = os.path.join(j.outputdir, j.backend.requestname)
            logger.info('crab_work_dir: %s' % crab_work_dir)

            statusresult = {}
            try:
                statusresult = crabCommand('status', dir = crab_work_dir, proxy = '/data/hc/apps/cms/config/x509up_production2', long=True)
                logger.info("CRAB3 Status result: %s" % statusresult)
            except httplib.HTTPException as e:
                logger.error(e.result)
            except ConfigurationException as ce:
                # From CRAB3 error message: Error loading CRAB cache file. Try to do 'rm -rf /root/.crab3' and run the crab command again.
                import subprocess
                import uuid
                randomstring = str(uuid.uuid4().get_hex().upper()[0:6])
                subprocess.call(["mv", "/root/.crab3", "/tmp/.crab3."+randomstring])
                try:
                    statusresult = crabCommand('status', dir = crab_work_dir, proxy = '/data/hc/apps/cms/config/x509up_production2', long=True)
                    logger.info("CRAB3 Status result: %s" % statusresult)
                except httplib.HTTPException as e:
                    logger.error(e.headers)
                    logger.error(e.result)            

            try:
               jobsdict = statusresult['jobs']
            except KeyError:
               jobsdict = {}

            if jobsdict:
                logger.info('There are subjob statuses for job %s' % j.id)
                if not j.subjobs:
                    logger.warning('No subjob object for job %s' % j.id)
                    subjoblist = [None] * len(jobsdict)
                    #j.subjobs = [None] * len(jobsdict)
                    #subjob_index = 0
                    for crabid, status in jobsdict.items():
                        crabid = int(crabid)
                        jobstatus = status['State']
                        logger.info('Creating subjob')
                        sj = Job()
                        sj.copyFrom(j)
                        sj.backend.crabid = crabid
                        sj.inputdata = None
                        sj.id = crabid-1
                        sj.updateStatus('submitting')
                        sj.backend.updateSubjobStatus(status)
                        subjoblist[crabid-1] = sj

                    for newsubjob in subjoblist:
                      j.subjobs.append(newsubjob)
                    logger.info('New subjobs for job %s: %s' % (j.id, j.subjobs))

                    #j.subjobs.sort(key=lambda subjob: subjob.id)

                else:
                    for crabid, status in jobsdict.items():
                        crabid = int(crabid)
                        j.subjobs[crabid-1].backend.updateSubjobStatus(status)

                #j.updateStatus('running')

            else:
                logger.info('There are no subjobs for job %s' % (j.id))
                #logger.info('Checking task status from report: %s' % statusresult['status'])
                logger.info('Checking task status from report')
                try:
                    taskstatus = statusresult['status']
                    if taskstatus in ['FAILED', 'SUBMITFAILED']:
                        logger.info('Job failed: %s' % taskstatus)
                        j.updateStatus('failed')
                except KeyError:
                    pass
Ejemplo n.º 21
0
def runTest(NTEST, rootDir, output_dir, rep_type):
    logger.debug('from runTest: rootDir %s, output_dir %s' %
                 (rootDir, output_dir))
    if rep_type == "Remote":
        repository = repositoryFactory(repositoryType=rep_type,
                                       root_dir=rootDir,
                                       streamer=SimpleJobStreamer(),
                                       host='lxgate41.cern.ch',
                                       port=8822,
                                       login=os.getlogin(),
                                       keepalive=True)
    elif rep_type == "Local":
        repository = repositoryFactory(repositoryType=rep_type,
                                       root_dir=rootDir,
                                       streamer=SimpleJobStreamer(),
                                       local_root=os.path.expanduser('~'))
    else:
        logger.error("Wrong type of repository...")
        logger.error("Exiting ...")
        return
    nn = tempfile.mktemp(suffix='.test')
    nn = os.path.join(output_dir, os.path.basename(nn))
    ff = file(nn, 'w')
    try:
        jj = []
        for i in range(NTEST):
            j = Job()
            #j.application = Gaudi()
            j.name = "MyJob" + str(i)
            j.application.args = 1000 * ['/abc']
            jj.append(j)

        #----------------------------------------------------
        t1 = _startText(ff, 'registering %d jobs...' % NTEST)
        logger.debug('registering %d jobs...' % NTEST)
        try:
            repository.registerJobs(jj)
        except Exception as e:
            logger.error("EXCEPTION in registerJobs " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->command status OK")
        _endText(ff, t1)

        #----------------------------------------------------
        t1 = _startText(ff, 'testing splitting of %d jobs...' % NTEST)
        logger.debug('testing splitting of  %d jobs...' % NTEST)
        try:
            for i in range(NTEST):
                testSplitting(repository, LEN=10)
        except Exception as e:
            logger.error("EXCEPTION in testSplitting " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->command status OK")
        _endText(ff, t1)

        #----------------------------------------------------
        t1 = _startText(ff, 'retrieving info about first 10 jobs...')
        logger.debug('retrieving info about first 10 jobs...')
        try:
            rjj = repository.checkoutJobs(map(lambda j: j.id, jj[:10]))
        except Exception as e:
            logger.error("EXCEPTION in checkoutJobs " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->checkout jobs " + map(lambda j: j.id, rjj))
        _endText(ff, t1)

        #----------------------------------------------------
        t1 = _startText(ff, 'retrieving info about ALL jobs')
        logger.debug('retrieving info about ALL jobs')
        try:
            rjj = repository.checkoutJobs({})
        except Exception as e:
            logger.error("EXCEPTION in checkoutJobs " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug(
                "--->checkout jobs " + len(rjj), map(lambda j: j.id, rjj))
        _endText(ff, t1)

        for j in jj:
            j.application = Executable()
            try:
                j.updateStatus('submitting')
            except:
                pass

        #----------------------------------------------------
        t1 = _startText(ff, 'commiting %d jobs...' % NTEST)
        logger.debug('commiting %d jobs...' % NTEST)
        try:
            repository.commitJobs(jj)
        except Exception as e:
            logger.error("EXCEPTION in commitJobs " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->command status OK")
        _endText(ff, t1)

        #----------------------------------------------------
        t1 = _startText(ff, 'setting status for %d jobs...' % NTEST)
        logger.debug('setting status for %d jobs...' % NTEST)
        try:
            repository.setJobsStatus(map(lambda j: (j.id, 'submitted'), jj))
        except Exception as e:
            logger.error("EXCEPTION in setJobsStatus " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->command status OK")
        _endText(ff, t1)

        #----------------------------------------------------
        t1 = _startText(ff, 'getting status of first 10 jobs...')
        logger.debug('getting status of first 10 jobs...')
        try:
            rjj = repository.getJobsStatus(map(lambda j: j.id, jj[:10]))
        except Exception as e:
            logger.error("EXCEPTION in getJobsStatus " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->command output " + str(len(rjj)) + str(rjj))
        _endText(ff, t1)

        #----------------------------------------------------
        t1 = _startText(ff, 'getting id of jobs with particular attributes...')
        logger.debug('getting id of jobs with particular attributes...')
        try:
            rjj = repository.getJobIds(
                {'status': 'submitted', 'application': 'Executable'})
        except Exception as e:
            logger.error("EXCEPTION in getJobIds " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->command output" + str(len(rjj)) + str(rjj))
        _endText(ff, t1)

        t1 = _startText(ff, 'retrieving info about ALL jobs')
        rjj = repository.checkoutJobs({})
        if DEBUG:
            logger.debug('retrieving info about ALL jobs')
            jj_id = map(lambda j: j.id, jj)
            st_lst = []
            for j in rjj:
                if j.id in jj_id:
                    st_lst.append((j.id, j.status))
            logger.error(
                "--->command output " + str(len(st_lst)) + ' ' + str(st_lst))
        _endText(ff, t1)

        t1 = _startText(ff, 'deleting %d jobs...' % NTEST)
        logger.debug('deleting %d jobs...' % NTEST)
        try:
            repository.deleteJobs(map(lambda j: j.id, jj))
        except Exception as e:
            logger.error("EXCEPTION in deleteJobs " + str(e))
            logger.debug("--->command status FAIL")
        else:
            logger.debug("--->command status OK")
        _endText(ff, t1)

    finally:
        ff.close()