def canSubmit(self, neededTime, canCurrentlySubmit): if not self._checkTimeleft(self._lowerLimit): raise UserError('Your access token (%s) only has %d seconds left! (Required are %s)' % (self.getObjectName(), self._getTimeleft(cached = True), strTime(self._lowerLimit))) if self._ignoreTime or (neededTime < 0): return True if not self._checkTimeleft(self._lowerLimit + neededTime) and canCurrentlySubmit: self._logUser.warning('Access token (%s) lifetime (%s) does not meet the access and walltime (%s) requirements!', self.getObjectName(), strTime(self._getTimeleft(cached = False)), strTime(self._lowerLimit + neededTime)) self._logUser.warning('Disabling job submission') return False return True
def _update(self, jobObj, jobNum, state, showWMS = False): if jobObj.state == state: return oldState = jobObj.state jobObj.update(state) self.jobDB.commit(jobNum, jobObj) jobNumLen = int(math.log10(max(1, len(self.jobDB))) + 1) jobStatus = ['Job %s state changed from %s to %s ' % (str(jobNum).ljust(jobNumLen), Job.enum2str(oldState), Job.enum2str(state))] if showWMS and jobObj.wmsId: jobStatus.append('(WMS:%s)' % jobObj.wmsId.split('.')[1]) if (state == Job.SUBMITTED) and (jobObj.attempt > 1): jobStatus.append('(retry #%s)' % (jobObj.attempt - 1)) elif (state == Job.QUEUED) and (jobObj.get('dest') != 'N/A'): jobStatus.append('(%s)' % jobObj.get('dest')) elif (state in [Job.WAITING, Job.ABORTED, Job.DISABLED]) and jobObj.get('reason'): jobStatus.append('(%s)' % jobObj.get('reason')) elif (state == Job.SUCCESS) and (jobObj.get('runtime', None) is not None): jobStatus.append('(runtime %s)' % strTime(jobObj.get('runtime') or 0)) elif state == Job.FAILED: msg = [] retCode = jobObj.get('retcode') if retCode: msg.append('error code: %d' % retCode) if (utils.verbosity() > 0) and (retCode in self._task.errorDict): msg.append(self._task.errorDict[retCode]) if jobObj.get('dest'): msg.append(jobObj.get('dest')) if len(msg): jobStatus.append('(%s)' % str.join(' - ', msg)) self._log_user_time.info(str.join(' ', jobStatus))
def canSubmit(self, neededTime, canCurrentlySubmit): if not self._checkTimeleft(self._lowerLimit): raise UserError( 'Your access token (%s) only has %d seconds left! (Required are %s)' % (self.getObjectName(), self._getTimeleft(cached=True), strTime(self._lowerLimit))) if self._ignoreTime: return True if not self._checkTimeleft(self._lowerLimit + neededTime) and canCurrentlySubmit: self._logUser.warning( 'Access token (%s) lifetime (%s) does not meet the access and walltime (%s) requirements!', self.getObjectName(), strTime(self._getTimeleft(cached=False)), strTime(self._lowerLimit + neededTime)) self._logUser.warning('Disabling job submission') return False return True
def display(self): cpuTime = 0 for jobNum in self._jobs: jobObj = self._jobDB.get(jobNum) if jobObj: cpuTime += jobObj.get('runtime', 0) sys.stdout.write('Consumed wall time: %-20s' % strTime(cpuTime)) sys.stdout.write('Estimated cost: $%.2f\n' % ((cpuTime / 60. / 60.) * self._dollar_per_hour))
def _checkTimeleft(self, neededTime): # check for time left delta = time.time() - self._lastUpdate timeleft = max(0, self._getTimeleft(cached = True) - delta) # recheck token => after > 30min have passed or when time is running out (max every 5 minutes) if (delta > self._minQueryTime) or (timeleft < neededTime and delta > self._maxQueryTime): self._lastUpdate = time.time() timeleft = self._getTimeleft(cached = False) self._logUser.info('Time left for access token "%s": %s', self.getObjectName(), strTime(timeleft)) return timeleft >= neededTime
def display(self): cpuTime = 0 for jobNum in self._jobs: jobObj = self._jobDB.get(jobNum) if jobObj: cpuTime += jobObj.get('runtime', 0) sys.stdout.write('Consumed wall time: %-20s' % strTime(cpuTime)) sys.stdout.write('Estimated cost: $%.2f\n' % ((cpuTime / 60. / 60.) * self._dollar_per_hour))
def display(self): job_runtimes = imap( lambda jobNum: self._jobDB.getJobTransient(jobNum).get( 'runtime', 0), self._jobs) cpuTime = sum(ifilter(lambda rt: rt > 0, job_runtimes)) msg = 'Consumed wall time: %-20s' % strTime(cpuTime) msg += 'Estimated cost: $%.2f\n' % ( (cpuTime / 60. / 60.) * self._dollar_per_hour) sys.stdout.write(msg) sys.stdout.flush()
def _checkTimeleft(self, neededTime): # check for time left delta = time.time() - self._lastUpdate timeleft = max(0, self._getTimeleft(cached=True) - delta) # recheck token => after > 30min have passed or when time is running out (max every 5 minutes) if (delta > self._minQueryTime) or (timeleft < neededTime and delta > self._maxQueryTime): self._lastUpdate = time.time() timeleft = self._getTimeleft(cached=False) self._logUser.info('Time left for access token "%s": %s', self.getObjectName(), strTime(timeleft)) return timeleft >= neededTime
def _setupJobParameters(self, config): data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self.dataSplitter = None self._data_refresh = -1 def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if (old_obj == '') and (cur_obj != ''): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._log.info('Dataset setup was changed - forcing resync...') config.setState(True, 'resync', detail = 'dataset') config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options return cur_obj dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = userRefresh) self._forceRefresh = config.getState('resync', detail = 'dataset') config.setState(False, 'resync', detail = 'dataset') if not dataProvider: return tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(data_config) # Create and register dataset parameter source partProcessor = data_config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters'])) DataParameterSource = ParameterSource.getClass('DataParameterSource') self._dataPS = DataParameterSource(data_config.getWorkPath(), 'data', dataProvider, self.dataSplitter, partProcessor) DataParameterSource.datasetsAvailable['data'] = self._dataPS # Select dataset refresh rate self._data_refresh = data_config.getTime('dataset refresh', -1, onChange = None) if self._data_refresh > 0: self._dataPS.resyncSetup(interval = max(self._data_refresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % strTime(self._data_refresh), -1) else: self._dataPS.resyncSetup(interval = 0) if self._forceRefresh: self._dataPS.resyncSetup(force = True) def externalRefresh(sig, frame): self._dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def setupJobParameters(self, config, pm): config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self.dataSplitter = None self.dataRefresh = -1 def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str): if (old_obj == '') and (cur_obj != ''): raise UserError('It is currently not possible to attach a dataset to a non-dataset task!') self._log.info('Dataset setup was changed - forcing resync...') config.setState(True, 'resync', detail = 'dataset') config.setState(True, 'init', detail = 'config') # This will trigger a write of the new options return cur_obj dataProvider = config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = userRefresh) self._forceRefresh = config.getState('resync', detail = 'dataset') config.setState(False, 'resync', detail = 'dataset') if not dataProvider: return tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self.dataSplitter = splitterClass(config) # Create and register dataset parameter source partProcessor = config.getCompositePlugin('partition processor', 'BasicPartitionProcessor LocationPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor) DataParameterSource = ParameterSource.getClass('DataParameterSource') self._dataPS = DataParameterSource(config.getWorkPath(), 'data', dataProvider, self.dataSplitter, partProcessor) DataParameterSource.datasetsAvailable['data'] = self._dataPS # Select dataset refresh rate self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None) if self.dataRefresh > 0: self._dataPS.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit())) utils.vprint('Dataset source will be queried every %s' % strTime(self.dataRefresh), -1) else: self._dataPS.resyncSetup(interval = 0) if self._forceRefresh: self._dataPS.resyncSetup(force = True) def externalRefresh(sig, frame): self._dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self.dataSplitter.getMaxJobs() == 0: raise UserError('There are no events to process')
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin('dataset', '', ':MultiDatasetProvider:', cls = DataProvider, requirePlugin = False, onChange = triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail = 'datasets') config.setState(False, 'resync', detail = 'datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, setTags = [], addSections = ['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin('partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls = PartitionProcessor, onChange = triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange = None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval = data_refresh, force = self._forceRefresh) def externalRefresh(sig, frame): self._log.info('External signal triggered resync of dataset source') dataPS.resyncSetup(force = True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError('Currently used dataset does not provide jobs to process') self._log.warning('Currently used dataset does not provide jobs to process')
def _update(self, jobObj, jobNum, state, showWMS=False): if jobObj.state == state: return oldState = jobObj.state jobObj.update(state) self.jobDB.commit(jobNum, jobObj) jobNumLen = int(math.log10(max(1, len(self.jobDB))) + 1) jobStatus = [ 'Job %s state changed from %s to %s ' % (str(jobNum).ljust(jobNumLen), Job.enum2str(oldState), Job.enum2str(state)) ] if showWMS and jobObj.wmsId: jobStatus.append('(WMS:%s)' % jobObj.wmsId.split('.')[1]) if (state == Job.SUBMITTED) and (jobObj.attempt > 1): jobStatus.append('(retry #%s)' % (jobObj.attempt - 1)) elif (state == Job.QUEUED) and (jobObj.get('dest') != 'N/A'): jobStatus.append('(%s)' % jobObj.get('dest')) elif (state in [Job.WAITING, Job.ABORTED, Job.DISABLED ]) and jobObj.get('reason'): jobStatus.append('(%s)' % jobObj.get('reason')) elif (state == Job.SUCCESS) and (jobObj.get('runtime', None) is not None): jobStatus.append('(runtime %s)' % strTime(jobObj.get('runtime') or 0)) elif state == Job.FAILED: msg = [] retCode = jobObj.get('retcode') if retCode: msg.append('error code: %d' % retCode) if (utils.verbosity() > 0) and (retCode in self._task.errorDict): msg.append(self._task.errorDict[retCode]) if jobObj.get('dest'): msg.append(jobObj.get('dest')) if len(msg): jobStatus.append('(%s)' % str.join(' - ', msg)) self._log_user_time.info(str.join(' ', jobStatus))
def _setupJobParameters(self, config, psrc_repository): TaskModule._setupJobParameters(self, config, psrc_repository) data_config = config.changeView(viewClass='TaggedConfigView', addSections=['dataset']) self._dataSplitter = None dataProvider = data_config.getCompositePlugin( 'dataset', '', ':MultiDatasetProvider:', cls=DataProvider, requirePlugin=False, onChange=triggerResync(['datasets', 'parameters'])) self._forceRefresh = config.getState('resync', detail='datasets') config.setState(False, 'resync', detail='datasets') if not dataProvider: return tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['storage']) tmp_config.set('se output pattern', '@NICK@_job_@GC_JOB_ID@_@X@') tmp_config = data_config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, setTags=[], addSections=['parameters']) tmp_config.set('default lookup', 'DATASETNICK') splitterName = data_config.get('dataset splitter', 'FileBoundarySplitter') splitterClass = dataProvider.checkSplitter( DataSplitter.getClass(splitterName)) self._dataSplitter = splitterClass(data_config) # Create and register dataset parameter source self._partProcessor = data_config.getCompositePlugin( 'partition processor', 'TFCPartitionProcessor LocationPartitionProcessor MetaPartitionProcessor BasicPartitionProcessor', 'MultiPartitionProcessor', cls=PartitionProcessor, onChange=triggerResync(['parameters'])) dataPS = ParameterSource.createInstance('DataParameterSource', data_config.getWorkPath(), 'data', dataProvider, self._dataSplitter, self._partProcessor, psrc_repository) # Select dataset refresh rate data_refresh = data_config.getTime('dataset refresh', -1, onChange=None) if data_refresh >= 0: data_refresh = max(data_refresh, dataProvider.queryLimit()) self._log.info('Dataset source will be queried every %s', strTime(data_refresh)) dataPS.resyncSetup(interval=data_refresh, force=self._forceRefresh) def externalRefresh(sig, frame): self._log.info( 'External signal triggered resync of dataset source') dataPS.resyncSetup(force=True) signal.signal(signal.SIGUSR2, externalRefresh) if self._dataSplitter.getMaxJobs() == 0: if data_refresh < 0: raise UserError( 'Currently used dataset does not provide jobs to process') self._log.warning( 'Currently used dataset does not provide jobs to process')