Python JobSpec.truncateStringAttr 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: taskbuffer.JobSpec

클래스/타입: JobSpec

메소드/함수: truncateStringAttr

hotexamples.com에서의 예제들: 2

Python JobSpec.truncateStringAttr - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 taskbuffer.JobSpec.JobSpec.truncateStringAttr에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

transformation(30)

jobParameters(30)

addFile(29)

prodDBlock(24)

prodSourceLabel(24)

jobName(24)

destinationSE(23)

JobSpec(22)

destinationDBlock(21)

AtlasRelease(20)

homepackage(19)

jobDefinitionID(19)

computingSite(18)

currentPriority(17)

cloud(12)

taskID(11)

cmtConfig(9)

VO(9)

assignedPriority(8)

processingType(8)

attemptNr(6)

metadata(6)

parentID(5)

maxAttempt(5)

specialHandling(2)

sourceSite(2)

workingGroup(2)

minRamCount(2)

maxDiskCount(2)

jediTaskID(2)

coreCount(2)

PandaID(2)

maxCpuCount(1)

prodSeriesLabel(1)

jobStatus(1)

prodUserID(1)

prodUserName(1)

destinationSite(1)

transferType(1)

truncateStringAttr(1)

values(1)

jobsetID(1)

예제 #1

파일 보기

파일: Watcher.py 프로젝트: rybkine/panda-server

    def run(self):
        try:
            while True:
                _logger.debug('%s start' % self.pandaID)
                # query job
                job = self.taskBuffer.peekJobs([self.pandaID],
                                               fromDefined=False,
                                               fromArchived=False,
                                               fromWaiting=False)[0]
                _logger.debug('%s in %s' % (self.pandaID, job.jobStatus))
                # check job status
                if job == None:
                    _logger.debug('%s escape : not found' % self.pandaID)
                    return
                if not job.jobStatus in [
                        'running', 'sent', 'starting', 'holding', 'stagein',
                        'stageout'
                ]:
                    if job.jobStatus == 'transferring' and (
                            job.prodSourceLabel in ['user', 'panda']
                            or job.jobSubStatus not in [None, 'NULL', '']):
                        pass
                    else:
                        _logger.debug('%s escape : %s' %
                                      (self.pandaID, job.jobStatus))
                        return
                # time limit
                timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                    minutes=self.sleepTime)
                if job.modificationTime < timeLimit or (
                        job.endTime != 'NULL' and job.endTime < timeLimit):
                    _logger.debug(
                        '%s %s lastmod:%s endtime:%s' %
                        (job.PandaID, job.jobStatus, str(
                            job.modificationTime), str(job.endTime)))
                    destDBList = []
                    if job.jobStatus == 'sent':
                        # sent job didn't receive reply from pilot within 30 min
                        job.jobDispatcherErrorCode = ErrorCode.EC_SendError
                        job.jobDispatcherErrorDiag = "Sent job didn't receive reply from pilot within 30 min"
                    elif job.exeErrorDiag == 'NULL' and job.pilotErrorDiag == 'NULL':
                        # lost heartbeat
                        job.jobDispatcherErrorCode = ErrorCode.EC_Watcher
                        if job.jobDispatcherErrorDiag == 'NULL':
                            if job.endTime == 'NULL':
                                # normal lost heartbeat
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(
                                    job.modificationTime)
                            else:
                                # job recovery failed
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(
                                    job.endTime)
                                if job.jobStatus == 'transferring':
                                    job.jobDispatcherErrorDiag += ' in transferring'
                            # get worker
                            workerSpecs = self.taskBuffer.getWorkersForJob(
                                job.PandaID)
                            if len(workerSpecs) > 0:
                                workerSpec = workerSpecs[0]
                                if workerSpec.status in [
                                        'finished', 'failed', 'cancelled',
                                        'missed'
                                ]:
                                    job.supErrorCode = SupErrors.error_codes[
                                        'WORKER_ALREADY_DONE']
                                    job.supErrorDiag = 'worker already {0} at {1} with {2}'.format(
                                        workerSpec.status,
                                        str(workerSpec.endTime),
                                        workerSpec.diagMessage)
                                    job.supErrorDiag = JobSpec.truncateStringAttr(
                                        'supErrorDiag', job.supErrorDiag)
                    else:
                        # job recovery failed
                        job.jobDispatcherErrorCode = ErrorCode.EC_Recovery
                        job.jobDispatcherErrorDiag = 'job recovery failed for %s hours' % (
                            self.sleepTime / 60)
                    # set job status
                    job.jobStatus = 'failed'
                    # set endTime for lost heartbeat
                    if job.endTime == 'NULL':
                        # normal lost heartbeat
                        job.endTime = job.modificationTime
                    # set files status
                    for file in job.Files:
                        if file.type == 'output' or file.type == 'log':
                            file.status = 'failed'
                            if not file.destinationDBlock in destDBList:
                                destDBList.append(file.destinationDBlock)
                    # event service
                    if EventServiceUtils.isEventServiceJob(
                            job
                    ) and not EventServiceUtils.isJobCloningJob(job):
                        eventStat = self.taskBuffer.getEventStat(
                            job.jediTaskID, job.PandaID)
                        # set sub status when no sucessful events
                        if EventServiceUtils.ST_finished not in eventStat:
                            job.jobSubStatus = 'es_heartbeat'
                    # update job
                    self.taskBuffer.updateJobs([job], False)
                    # start closer
                    if job.jobStatus == 'failed':

                        source = 'jobDispatcherErrorCode'
                        error_code = job.jobDispatcherErrorCode
                        error_diag = job.jobDispatcherErrorDiag

                        try:
                            _logger.debug(
                                "Watcher will call apply_retrial_rules")
                            retryModule.apply_retrial_rules(
                                self.taskBuffer, job.PandaID, source,
                                error_code, error_diag, job.attemptNr)
                            _logger.debug("apply_retrial_rules is back")
                        except Exception as e:
                            _logger.debug(
                                "apply_retrial_rules excepted and needs to be investigated (%s): %s"
                                % (e, traceback.format_exc()))

                        # updateJobs was successful and it failed a job with taskBufferErrorCode
                        try:

                            _logger.debug("Watcher.run will peek the job")
                            job_tmp = self.taskBuffer.peekJobs(
                                [job.PandaID],
                                fromDefined=False,
                                fromArchived=True,
                                fromWaiting=False)[0]
                            if job_tmp.taskBufferErrorCode:
                                source = 'taskBufferErrorCode'
                                error_code = job_tmp.taskBufferErrorCode
                                error_diag = job_tmp.taskBufferErrorDiag
                                _logger.debug(
                                    "Watcher.run 2 will call apply_retrial_rules"
                                )
                                retryModule.apply_retrial_rules(
                                    self.taskBuffer, job_tmp.PandaID, source,
                                    error_code, error_diag, job_tmp.attemptNr)
                                _logger.debug("apply_retrial_rules 2 is back")
                        except IndexError:
                            pass
                        except Exception as e:
                            self.logger.error(
                                "apply_retrial_rules 2 excepted and needs to be investigated (%s): %s"
                                % (e, traceback.format_exc()))

                        cThr = Closer(self.taskBuffer, destDBList, job)
                        cThr.start()
                        cThr.join()
                    _logger.debug('%s end' % job.PandaID)
                    return
                # single action
                if self.single:
                    return
                # sleep
                time.sleep(60 * self.sleepTime)
        except:
            type, value, traceBack = sys.exc_info()
            _logger.error("run() : %s %s" % (type, value))
            return

예제 #2

파일 보기

파일: Watcher.py 프로젝트: PanDAWMS/panda-server

    def run(self):
        try:
            while True:
                _logger.debug('%s start' % self.pandaID)
                # query job
                job = self.taskBuffer.peekJobs([self.pandaID],fromDefined=False,
                                               fromArchived=False,fromWaiting=False)[0]
                _logger.debug('%s in %s' % (self.pandaID, job.jobStatus))
                # check job status
                if job == None:
                    _logger.debug('%s escape : not found' % self.pandaID)
                    return
                if not job.jobStatus in ['running','sent','starting','holding',
                                         'stagein','stageout']:
                    if job.jobStatus == 'transferring' and (job.prodSourceLabel in ['user','panda'] or job.jobSubStatus not in [None, 'NULL', '']):
                        pass
                    else:
                        _logger.debug('%s escape : %s' % (self.pandaID,job.jobStatus))
                        return
                # time limit
                timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=self.sleepTime)
                if job.modificationTime < timeLimit or (job.endTime != 'NULL' and job.endTime < timeLimit):
                    _logger.debug('%s %s lastmod:%s endtime:%s' % (job.PandaID,job.jobStatus,
                                                                   str(job.modificationTime),
                                                                   str(job.endTime)))
                    destDBList = []
                    if job.jobStatus == 'sent':
                        # sent job didn't receive reply from pilot within 30 min
                        job.jobDispatcherErrorCode = ErrorCode.EC_SendError
                        job.jobDispatcherErrorDiag = "Sent job didn't receive reply from pilot within 30 min"
                    elif job.exeErrorDiag == 'NULL' and job.pilotErrorDiag == 'NULL':
                        # lost heartbeat
                        job.jobDispatcherErrorCode = ErrorCode.EC_Watcher
                        if job.jobDispatcherErrorDiag == 'NULL':
                            if job.endTime == 'NULL':
                                # normal lost heartbeat
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(job.modificationTime)
                            else:
                                # job recovery failed
                                job.jobDispatcherErrorDiag = 'lost heartbeat : %s' % str(job.endTime)
                                if job.jobStatus == 'transferring':
                                    job.jobDispatcherErrorDiag += ' in transferring'
                            # get worker
                            workerSpecs = self.taskBuffer.getWorkersForJob(job.PandaID)
                            if len(workerSpecs) > 0:
                                workerSpec = workerSpecs[0]
                                if workerSpec.status in ['finished', 'failed', 'cancelled', 'missed']:
                                    job.supErrorCode = SupErrors.error_codes['WORKER_ALREADY_DONE']
                                    job.supErrorDiag = 'worker already {0} at {1} with {2}'.format(workerSpec.status, str(workerSpec.endTime),
                                                                                                   workerSpec.diagMessage)
                                    job.supErrorDiag = JobSpec.truncateStringAttr('supErrorDiag', job.supErrorDiag)
                    else:
                        # job recovery failed
                        job.jobDispatcherErrorCode = ErrorCode.EC_Recovery
                        job.jobDispatcherErrorDiag = 'job recovery failed for %s hours' % (self.sleepTime/60)
                    # set job status
                    job.jobStatus = 'failed'
                    # set endTime for lost heartbeat
                    if job.endTime == 'NULL':
                        # normal lost heartbeat
                        job.endTime = job.modificationTime
                    # set files status
                    for file in job.Files:
                        if file.type == 'output' or file.type == 'log':
                            file.status = 'failed'
                            if not file.destinationDBlock in destDBList:
                                destDBList.append(file.destinationDBlock)
                    # event service
                    if EventServiceUtils.isEventServiceJob(job) and not EventServiceUtils.isJobCloningJob(job):
                        eventStat = self.taskBuffer.getEventStat(job.jediTaskID, job.PandaID)
                        # set sub status when no sucessful events
                        if EventServiceUtils.ST_finished not in eventStat:
                            job.jobSubStatus = 'es_heartbeat'
                    # update job
                    self.taskBuffer.updateJobs([job],False)
                    # start closer
                    if job.jobStatus == 'failed':

                        source = 'jobDispatcherErrorCode'
                        error_code = job.jobDispatcherErrorCode
                        error_diag = job.jobDispatcherErrorDiag

                        try:
                            _logger.debug("Watcher will call apply_retrial_rules")
                            retryModule.apply_retrial_rules(self.taskBuffer, job.PandaID, source, error_code, error_diag, job.attemptNr)
                            _logger.debug("apply_retrial_rules is back")
                        except Exception as e:
                            _logger.debug("apply_retrial_rules excepted and needs to be investigated (%s): %s"%(e, traceback.format_exc()))

                        # updateJobs was successful and it failed a job with taskBufferErrorCode
                        try:

                            _logger.debug("Watcher.run will peek the job")
                            job_tmp = self.taskBuffer.peekJobs([job.PandaID], fromDefined=False, fromArchived=True,
                                                               fromWaiting=False)[0]
                            if job_tmp.taskBufferErrorCode:
                                source = 'taskBufferErrorCode'
                                error_code = job_tmp.taskBufferErrorCode
                                error_diag = job_tmp.taskBufferErrorDiag
                                _logger.debug("Watcher.run 2 will call apply_retrial_rules")
                                retryModule.apply_retrial_rules(self.taskBuffer, job_tmp.PandaID, source, error_code,
                                                                error_diag, job_tmp.attemptNr)
                                _logger.debug("apply_retrial_rules 2 is back")
                        except IndexError:
                            pass
                        except Exception as e:
                            self.logger.error("apply_retrial_rules 2 excepted and needs to be investigated (%s): %s" % (e, traceback.format_exc()))

                        cThr = Closer(self.taskBuffer,destDBList,job)
                        cThr.start()
                        cThr.join()
                    _logger.debug('%s end' % job.PandaID)                        
                    return
                # single action
                if self.single:
                    return
                # sleep
                time.sleep(60*self.sleepTime)
        except:
            type, value, traceBack = sys.exc_info()
            _logger.error("run() : %s %s" % (type,value))
            return