Example #1
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        """
        This is the old bulk submit method which is used to construct the subjobs for a parametric job
        Args:
            dirac_ids (list): This is a list of the Dirac ids which have been created
            dirac_script (str): Name of the dirac script which contains the job jdl
        """
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError(
                'Dirac',
                'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC'
            )

        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True
Example #2
0
    def master_updateMonitoringInformation(jobs):
        """Updates the statuses of the list of jobs provided by issuing crab -status."""
        logger.info('Updating the monitoring information of ' + str(len(jobs)) + ' jobs')
        try:
            from Ganga.GPIDev.Lib.Job.Job import Job
            import sys, traceback
 
            for j in jobs:
                server = CRABServer()
                logger.debug('Updating monitoring information for job %d (%s)' % (j.id, j.status))
                try:
                    dictresult, status, reason = server.status(j)
                    logger.info('CRAB3 server call answer status: %s - reason: %s' % (status, reason))
                    joblist = sorted(dictresult['result'][0]['jobList'], key=lambda x:x[1])
                except KeyError:
                    logger.info('Get status for job %d didn\'t return job list, skipping job for now.' % j.id)
                    
                    continue
                except: 
                    logger.error('Get status for job %d failed, skipping.' % j.id)
                    raise

                if joblist:
                    logger.info('There are subjob statuses for job %s' % j.id)
                    logger.info('j: %s' % dir(j))
                    if not j.subjobs:
                        logger.warning('No subjob object for job %s' % j.id)
                        j.subjobs = []
                        for i in xrange(len(joblist)):
                            subjob = joblist[i]
                            index  = int(subjob[1])
                            logger.info('Processing subjob %d, %s' % (index, subjob))
                            sj = Job()
                            sj.copyFrom(j)
                            sj.backend.crabid = index
                            sj.id = i
                            sj.updateStatus('submitting')
                            sj.backend.checkReport(subjob)
                            sj.backend.checkStatus()
                            j.subjobs.append(sj)
                        #j.subjobs = sorted(j.subjobs, key=lambda x: x.backend.id) 
                        #j._commit()  
                    else:
                        for subjob in joblist:
                            index  = int(subjob[1])
                            logger.debug('Found subjob %s searching with index %s' % (j.subjobs[index-1].backend.crabid, index))
                            j.subjobs[index-1].backend.checkReport(subjob)                   
                            j.subjobs[index-1].backend.checkStatus()

                    j.updateMasterJobStatus()
                else:
                    logger.info('There are no subjobs for job %s' % (j.id))
                    logger.info('checking task status from report: %s' % dictresult['result'][0]['status'])
                    taskstatus = dictresult['result'][0]['status']
                    if taskstatus in ['FAILED']:
                        logger.info('Job failed: %s' % dictresult)
                        j.updateStatus('failed')
        except Exception as e:
            logger.error(e)
            traceback.print_exc(file=sys.stdout)
Example #3
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        """
        This is the old bulk submit method which is used to construct the subjobs for a parametric job
        Args:
            dirac_ids (list): This is a list of the Dirac ids which have been created
            dirac_script (str): Name of the dirac script which contains the job jdl
        """
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC')

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True
Example #4
0
 def master_setup_bulk_subjobs(self, jobs, jdefids):
        
     from Ganga.GPIDev.Lib.Job.Job import Job
     master_job=self.getJobObject()
     for i in range(len(jdefids)):
         j=Job()
         j.copyFrom(master_job)
         j.splitter = None
         j.backend=Panda()
         j.backend.id = jdefids[i]
         j.id = i
         j.status = 'submitted'
         j.time.timenow('submitted')
         master_job.subjobs.append(j)
     return True
Example #5
0
    def master_setup_bulk_subjobs(self, jobs, jdefids):

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        for i in range(len(jdefids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend = Panda()
            j.backend.id = jdefids[i]
            j.id = i
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        return True
Example #6
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC')

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        master_job._commit()
        return True
Example #7
0
    def _setup_bulk_subjobs(self, dirac_ids, dirac_script):
        f = open(dirac_script, 'r')
        parametric_datasets = get_parametric_datasets(f.read().split('\n'))
        f.close()
        if len(parametric_datasets) != len(dirac_ids):
            raise BackendError('Dirac', 'Missmatch between number of datasets defines in dirac API script and those returned by DIRAC')

        from Ganga.GPIDev.Lib.Job.Job import Job
        master_job = self.getJobObject()
        master_job.subjobs = []
        for i in range(len(dirac_ids)):
            j = Job()
            j.copyFrom(master_job)
            j.splitter = None
            j.backend.id = dirac_ids[i]
            j.id = i
            j.inputdata = self._setup_subjob_dataset(parametric_datasets[i])
            j.status = 'submitted'
            j.time.timenow('submitted')
            master_job.subjobs.append(j)
        master_job._commit()
        return True
Example #8
0
    def master_updateMonitoringInformation(jobs):
        """Updates the statuses of the list of jobs provided by issuing crab -status."""
        logger.info('Updating the monitoring information of ' +
                    str(len(jobs)) + ' jobs')
        try:
            from Ganga.GPIDev.Lib.Job.Job import Job
            import sys, traceback

            for j in jobs:
                server = CRABServer()
                logger.debug(
                    'Updating monitoring information for job %d (%s)' %
                    (j.id, j.status))
                try:
                    dictresult, status, reason = server.status(j)
                    logger.info(
                        'CRAB3 server call answer status: %s - reason: %s' %
                        (status, reason))
                    joblist = sorted(dictresult['result'][0]['jobList'],
                                     key=lambda x: x[1])
                except KeyError:
                    logger.info(
                        'Get status for job %d didn\'t return job list, skipping job for now.'
                        % j.id)

                    continue
                except:
                    logger.error('Get status for job %d failed, skipping.' %
                                 j.id)
                    raise

                if joblist:
                    logger.info('There are subjob statuses for job %s' % j.id)
                    logger.info('j: %s' % dir(j))
                    if not j.subjobs:
                        logger.warning('No subjob object for job %s' % j.id)
                        j.subjobs = []
                        for i in xrange(len(joblist)):
                            subjob = joblist[i]
                            index = int(subjob[1])
                            logger.info('Processing subjob %d, %s' %
                                        (index, subjob))
                            sj = Job()
                            sj.copyFrom(j)
                            sj.backend.crabid = index
                            sj.id = i
                            sj.updateStatus('submitting')
                            sj.backend.checkReport(subjob)
                            sj.backend.checkStatus()
                            j.subjobs.append(sj)
                        #j.subjobs = sorted(j.subjobs, key=lambda x: x.backend.id)
                        #j._commit()
                    else:
                        for subjob in joblist:
                            index = int(subjob[1])
                            logger.debug(
                                'Found subjob %s searching with index %s' %
                                (j.subjobs[index - 1].backend.crabid, index))
                            j.subjobs[index - 1].backend.checkReport(subjob)
                            j.subjobs[index - 1].backend.checkStatus()

                    j.updateMasterJobStatus()
                else:
                    logger.info('There are no subjobs for job %s' % (j.id))
                    logger.info('checking task status from report: %s' %
                                dictresult['result'][0]['status'])
                    taskstatus = dictresult['result'][0]['status']
                    if taskstatus in ['FAILED']:
                        logger.info('Job failed: %s' % dictresult)
                        j.updateStatus('failed')
        except Exception as e:
            logger.error(e)
            traceback.print_exc(file=sys.stdout)
Example #9
0
    def master_updateMonitoringInformation(jobs):
        """Updates the statuses of the list of jobs provided by issuing crab -status."""
        logger.info('Updating the monitoring information of ' + str(len(jobs)) + ' jobs')

        from CRABAPI.RawCommand import crabCommand
        from CRABClient.ClientExceptions import ConfigurationException
        import httplib

        for j in jobs:

            logger.info('Updating monitoring information for job %d (%s)' % (j.id, j.status))
            if not j.backend.requestname:
                logger.warning("Couldn't find request name for job %s. Skipping" % s)
                continue
            crab_work_dir = os.path.join(j.outputdir, j.backend.requestname)
            logger.info('crab_work_dir: %s' % crab_work_dir)

            statusresult = {}
            try:
                statusresult = crabCommand('status', dir = crab_work_dir, proxy = '/data/hc/apps/cms/config/x509up_production2', long=True)
                logger.info("CRAB3 Status result: %s" % statusresult)
            except httplib.HTTPException as e:
                logger.error(e.result)
            except ConfigurationException as ce:
                # From CRAB3 error message: Error loading CRAB cache file. Try to do 'rm -rf /root/.crab3' and run the crab command again.
                import subprocess
                import uuid
                randomstring = str(uuid.uuid4().get_hex().upper()[0:6])
                subprocess.call(["mv", "/root/.crab3", "/tmp/.crab3."+randomstring])
                try:
                    statusresult = crabCommand('status', dir = crab_work_dir, proxy = '/data/hc/apps/cms/config/x509up_production2', long=True)
                    logger.info("CRAB3 Status result: %s" % statusresult)
                except httplib.HTTPException as e:
                    logger.error(e.headers)
                    logger.error(e.result)            

            try:
               jobsdict = statusresult['jobs']
            except KeyError:
               jobsdict = {}

            if jobsdict:
                logger.info('There are subjob statuses for job %s' % j.id)
                if not j.subjobs:
                    logger.warning('No subjob object for job %s' % j.id)
                    subjoblist = [None] * len(jobsdict)
                    #j.subjobs = [None] * len(jobsdict)
                    #subjob_index = 0
                    for crabid, status in jobsdict.items():
                        crabid = int(crabid)
                        jobstatus = status['State']
                        logger.info('Creating subjob')
                        sj = Job()
                        sj.copyFrom(j)
                        sj.backend.crabid = crabid
                        sj.inputdata = None
                        sj.id = crabid-1
                        sj.updateStatus('submitting')
                        sj.backend.updateSubjobStatus(status)
                        subjoblist[crabid-1] = sj

                    for newsubjob in subjoblist:
                      j.subjobs.append(newsubjob)
                    logger.info('New subjobs for job %s: %s' % (j.id, j.subjobs))

                    #j.subjobs.sort(key=lambda subjob: subjob.id)

                else:
                    for crabid, status in jobsdict.items():
                        crabid = int(crabid)
                        j.subjobs[crabid-1].backend.updateSubjobStatus(status)

                #j.updateStatus('running')

            else:
                logger.info('There are no subjobs for job %s' % (j.id))
                #logger.info('Checking task status from report: %s' % statusresult['status'])
                logger.info('Checking task status from report')
                try:
                    taskstatus = statusresult['status']
                    if taskstatus in ['FAILED', 'SUBMITFAILED']:
                        logger.info('Job failed: %s' % taskstatus)
                        j.updateStatus('failed')
                except KeyError:
                    pass