def display(self): taskConfig = self._task.getTaskConfig() header = lzip(taskConfig, taskConfig) header.extend(imap(lambda key: (key, '<%s>' % key), self._task.getTransientVars())) variables = set() entries = [] for jobNum in self._jobDB.getJobs(self._selector): jobConfig = self._task.getJobConfig(jobNum) variables.update(jobConfig) entry = dict(taskConfig) entry.update(self._task.getTransientVars()) entry.update(jobConfig) entries.append(entry) printTabular(sorted(header + lzip(variables, variables)), entries)
def _submit_jobs(self, jobnum_list, task): # submit_jobs: Submit a number of jobs and yield (jobnum, WMS ID, other data) sequentially # >>jobnum: internal ID of the Job # JobNum is linked to the actual *task* here (jdl_fn, submit_jdl_fn) = self._submit_jobs_prepare(jobnum_list, task) try: # submit all jobs simultaneously and temporarily store verbose (ClassAdd) output activity = Activity('queuing jobs at scheduler') submit_args = ' -verbose -batch-name ' + task.get_description().task_name + ' ' + submit_jdl_fn proc = self._proc_factory.logged_execute(self._submit_exec, submit_args) # extract the Condor ID (WMS ID) of the jobs from output ClassAds jobnum_gc_id_list = [] for line in proc.iter(): if 'GridControl_GCIDtoWMSID' in line: jobnum_wms_id = line.split('=')[1].strip(' "\n').split('@') jobnum, wms_id = int(jobnum_wms_id[0]), jobnum_wms_id[1].strip() # Condor creates a default job then overwrites settings on any subsequent job # i.e. skip every second, but better be sure if (not jobnum_gc_id_list) or (jobnum not in lzip(*jobnum_gc_id_list)[0]): jobnum_gc_id_list.append((jobnum, self._create_gc_id(wms_id))) exit_code = proc.wait() activity.finish() if (exit_code != 0) or (len(jobnum_gc_id_list) < len(jobnum_list)): if not self._explain_error(proc, exit_code): self._log.error('Submitted %4d jobs of %4d expected', len(jobnum_gc_id_list), len(jobnum_list)) proc.log_error(self._error_log_fn, jdl=jdl_fn) finally: remove_files([jdl_fn]) for (jobnum, gc_id) in jobnum_gc_id_list: yield (jobnum, gc_id, {})
def draw_pie(ax, breakdown, pos, size, piecolor = None): piecolor = piecolor or ['red', 'orange', 'green', 'blue', 'purple'] breakdown = [0] + list(numpy.cumsum(breakdown)* 1.0 / sum(breakdown)) for i in irange(len(breakdown)-1): x = [0] + numpy.cos(numpy.linspace(2 * math.pi * breakdown[i], 2 * math.pi * breakdown[i+1], 20)).tolist() y = [0] + numpy.sin(numpy.linspace(2 * math.pi * breakdown[i], 2 * math.pi * breakdown[i+1], 20)).tolist() ax.scatter(pos[0], pos[1], marker=(lzip(x, y), 0), s = size, facecolor = piecolor[i % len(piecolor)])
def display(self): taskConfig = self._task.getTaskConfig() header = lzip(taskConfig, taskConfig) header.extend( imap(lambda key: (key, '<%s>' % key), self._task.getTransientVars())) variables = set() entries = [] for jobNum in self._jobDB.getJobs(self._selector): jobConfig = self._task.getJobConfig(jobNum) variables.update(jobConfig) entry = dict(taskConfig) entry.update(self._task.getTransientVars()) entry.update(jobConfig) entries.append(entry) printTabular(sorted(header + lzip(variables, variables)), entries)
def show_report(self, job_db, jobnum_list): state_map = dict(self._state_map) def _transform(data, label, level): if None in data: total = data.pop(None) if len(data) > 1: for result in self._get_entry(state_map, total, ['Total']): yield result yield '=' for idx, entry in enumerate(sorted(data)): if level == 1: for result in self._get_entry(state_map, data[entry], [entry] + label): yield result else: for result in _transform(data[entry], [entry] + label, level - 1): yield result if idx != len(data) - 1: yield '-' stats = self._get_hierachical_stats_dict(job_db, jobnum_list) displace_states_list = lmap(itemgetter(1), self._state_map) header = [('', 'Category')] + lzip(displace_states_list, displace_states_list) self._show_table(header, _transform(stats, [], len(self._idx_list)), align_str='l' + 'c' * len(state_map), fmt_dict={'': lambda x: str.join(' ', x)})
def _parseParameterTuple(self, varName, tupleValue, tupleType, varType, varIndex): if tupleType == 'tuple': tupleDelimeter = self.get(self._getParameterOption(varName), 'delimeter', ',') tupleStrings = lmap( str.strip, utils.split_advanced(tupleValue, lambda tok: tok in ' \n', lambda tok: False)) tupleList = lmap(lambda t: parseTuple(t, tupleDelimeter), tupleStrings) elif tupleType == 'binning': tupleList = lzip(tupleValue.split(), tupleValue.split()[1:]) result = [] for tupleEntry in tupleList: try: tmp = self._parseParameter(varName, tupleEntry[varIndex], varType) except Exception: raise ConfigError('Unable to parse %r' % repr( (tupleEntry, tupleStrings))) if isinstance(tmp, list): if len(tmp) != 1: raise ConfigError( '[Variable: %s] Tuple entry (%s) expands to multiple variable entries (%s)!' % (varName, tupleEntry[varIndex], tmp)) result.append(tmp[0]) else: result.append(tmp) return result
def replaceDict(result, allVars, varMapping=None): for (virtual, real) in QM(varMapping, varMapping, lzip(allVars.keys(), allVars.keys())): for delim in ['@', '__']: result = result.replace(delim + virtual + delim, str(allVars.get(real, ''))) return result
def __init__(self, head, data, fmt=None, top=True): self.table = """ <style type="text/css"> table {font-size:12px;color:#333333;border-width: 1px;border-color: #7799aa;border-collapse: collapse;} th {font-size:12px;background-color:#aacccc;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;text-align:left;} tr {background-color:#ffffff;} td {font-size:12px;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;} </style>""" fmt = fmt or {} lookupDict = lmap( lambda id_name: (id_name[0], fmt.get(id_name[0], str)), head) headerList = lmap(lambda id_name: '<th>%s</th>' % id_name[1], head) def entryList(entry): return lmap( lambda id_fmt: '<td>%s</td>' % id_fmt[1](entry.get(id_fmt[0])), lookupDict) rowList = [headerList] + lmap(entryList, data) if not top: rowList = lzip(*rowList) rows = lmap(lambda row: '\t<tr>%s</tr>\n' % str.join('', row), rowList) if top: widthStr = 'width:100%;' else: widthStr = '' self.table += '<table style="%s" border="1">\n%s</table>' % ( widthStr, str.join('', rows))
def jobs(self, *args, **kw): result = '<body>' result += str(CPProgressBar(0, min(100, self.counter), 100, 300)) if 'job' in kw: jobNum = int(kw['job']) info = self.task.getJobConfig(jobNum) result += str( TabularHTML(lzip(sorted(info), sorted(info)), [info], top=False)) def getJobObjs(): for jobNum in self.jobMgr.jobDB.getJobs(): result = self.jobMgr.jobDB.get(jobNum).__dict__ result['jobNum'] = jobNum result.update(result['dict']) yield result fmtTime = lambda t: time.strftime('%Y-%m-%d %T', time.localtime(t)) result += str( TabularHTML( [('jobNum', 'Job'), ('state', 'Status'), ('attempt', 'Attempt'), ('wmsId', 'WMS ID'), ('dest', 'Destination'), ('submitted', 'Submitted')], getJobObjs(), fmt={ 'jobNum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x), 'state': Job.enum2str, 'submitted': fmtTime }, top=True)) result += '</body>' return result
def getStatusDirect(wmsId): wrStatus.getStatusDirect(wmsId, 0) err, apiMsg = wrStatus.get_error() if err: raise BackendError(apiMsg) info = wrStatus.loadStatus() return lzip(imap(str.lower, jobStatus.states_names), info[0:jobStatus.ATTR_MAX])
def jobs(self, *args, **kw): element_list = [CPProgressBar(0, min(100, self._counter), 100, 300)] if 'job' in kw: jobnum = int(kw['job']) info = self._workflow.task.get_job_dict(jobnum) element_list.append( CPTable(lzip(sorted(info), sorted(info)), [info], pivot=False)) def _fmt_time(value): return time.strftime('%Y-%m-%d %T', time.localtime(value)) def _iter_job_objs(): for jobnum in self._workflow.job_manager.job_db.get_job_list(): result = self._workflow.job_manager.job_db.get_job_transient( jobnum).__dict__ result['jobnum'] = jobnum result.update(result['dict']) yield result header_list = [('jobnum', 'Job'), ('state', 'Status'), ('attempt', 'Attempt'), ('gc_id', 'WMS ID'), ('SITE', 'Site'), ('QUEUE', 'Queue'), ('submitted', 'Submitted')] fmt_dict = { 'jobnum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x), 'state': Job.enum2str, 'submitted': _fmt_time } element_list.append( CPTable(header_list, _iter_job_objs(), fmt_dict=fmt_dict, pivot=True)) return _get_html_page(element_list)
def display(self): stateMap = dict(self._stateMap) def transform(data, label, level): if None in data: total = data.pop(None) if (len(data) > 1): for result in self._get_entry(stateMap, total, ['Total']): yield result yield '=' for idx, entry in enumerate(sorted(data)): if level == 1: for result in self._get_entry(stateMap, data[entry], [entry] + label): yield result else: for result in transform(data[entry], [entry] + label, level - 1): yield result if idx != len(data) - 1: yield '-' stats = self._getHierachicalStats() displayStates = lmap(itemgetter(1), self._stateMap) header = [('', 'Category')] + lzip(displayStates, displayStates) printTabular(header, transform(stats, [], len(self._idxList)), fmtString = 'l' + 'c'*len(stateMap), fmt = {'': lambda x: str.join(' ', x)}) return 0
def replace_with_dict(value, mapping_values, mapping_keys=None): mapping_keys = mapping_keys or lzip(mapping_values.keys(), mapping_values.keys()) for (virtual, real) in mapping_keys: for delim in ['@', '__']: value = value.replace(delim + virtual + delim, str(mapping_values.get(real, ''))) return value
def draw_pie(ax, breakdown, pos, size, piecolor = None): piecolor = piecolor or ['red', 'orange', 'green', 'blue', 'purple'] breakdown = [0] + list(numpy.cumsum(breakdown)* 1.0 / sum(breakdown)) for i in irange(len(breakdown)-1): fracs = numpy.linspace(2 * math.pi * breakdown[i], 2 * math.pi * breakdown[i+1], 20) x = [0] + numpy.cos(fracs).tolist() y = [0] + numpy.sin(fracs).tolist() ax.scatter(pos[0], pos[1], marker=(lzip(x, y), 0), s = size, facecolor = piecolor[i % len(piecolor)])
def _get_status_direct(wms_id): wrapper_status.getStatus(wms_id, 0) err, api_msg = wrapper_status.get_error() if err: raise BackendError(api_msg) info = wrapper_status.loadStatus() return lzip(imap(str.lower, job_status.states_names), info[0:job_status.ATTR_MAX])
def display_metadata(dataset_list, block, metadata_key_list, metadata_list, base_header_list=None): header_list = [(DataProvider.BlockName, 'Block')] + (base_header_list or []) + \ lzip(sorted(metadata_key_list), sorted(metadata_key_list)) for metadata in metadata_list: metadata[DataProvider.Dataset] = block[DataProvider.Dataset] metadata[DataProvider.BlockName] = block.get(DataProvider.BlockName) title = get_title_update_header(dataset_list, header_list) ConsoleTable.create(header_list, metadata_list, title=title, pivot=True)
def getStatusDirect(wmsID): wrStatus.getStatus(wmsID, 0) err, apiMsg = wrStatus.get_error() if err: raise BackendError(apiMsg) info = wrStatus.loadStatus() return lzip(imap(str.lower, jobStatus.states_names), info[0:jobStatus.ATTR_MAX])
def show_report(self, job_db, jobnum_list): (header_list, job_env_dict_list, vn_set) = ([], [], set()) for jobnum in jobnum_list: job_env_dict = self._task.get_job_dict(jobnum) vn_set.update(job_env_dict) job_env_dict.update(self._task.get_transient_variables()) job_env_dict_list.append(job_env_dict) header_list.extend(imap(lambda key: (key, '<%s>' % key), self._task.get_transient_variables())) self._show_table(sorted(header_list + lzip(vn_set, vn_set)), job_env_dict_list)
def show_report(self, job_db, jobnum_list): report_dict_list = [] for jobnum in jobnum_list: job_obj = job_db.get_job_transient(jobnum) if job_obj.state != Job.INIT: report_dict_list.append({0: jobnum, 1: Job.enum2str(job_obj.state), 2: job_obj.gc_id}) self._fill_report_dict_list(report_dict_list, job_obj) header_list = ['Job', 'Status / Attempt', 'Id / Destination'] self._show_table(lzip(irange(3), header_list), report_dict_list, 'rcl')
def list_metadata(datasets, blocks): print('') for block in blocks: if len(datasets) > 1: print('Dataset: %s' % block[DataProvider.Dataset]) print('Blockname: %s' % block[DataProvider.BlockName]) mk_len = max(imap(len, block.get(DataProvider.Metadata, ['']))) for f in block[DataProvider.FileList]: print('%s [%d events]' % (f[DataProvider.URL], f[DataProvider.NEntries])) print_metadata(lzip(block.get(DataProvider.Metadata, []), f.get(DataProvider.Metadata, [])), mk_len) print('')
def show_report(self, job_db, jobnum_list): (header_list, job_env_dict_list, vn_set) = ([], [], set()) for jobnum in jobnum_list: job_env_dict = self._task.get_job_dict(jobnum) vn_set.update(job_env_dict) job_env_dict.update(self._task.get_transient_variables()) job_env_dict_list.append(job_env_dict) header_list.extend( imap(lambda key: (key, '<%s>' % key), self._task.get_transient_variables())) self._show_table(sorted(header_list + lzip(vn_set, vn_set)), job_env_dict_list)
def cancelJobs(self, wmsJobIdList): if len(wmsJobIdList) == 0: raise StopIteration self.debugOut("Started canceling: %s" % set(lzip(*wmsJobIdList)[0])) self.debugPool() wmsIdList = list(self._getRawIDs(wmsJobIdList)) wmsIdArgument = " ".join(wmsIdList) wmsToJobMap = dict(wmsJobIdList) activity = utils.ActivityLog('cancelling jobs') cancelProcess = self.Pool.LoggedExecute( self.cancelExec, '%(jobIDs)s' % {"jobIDs": wmsIdArgument}) # check if canceling actually worked for cancelReturnLine in cancelProcess.iter(): if (cancelReturnLine != '\n') and ('marked for removal' in cancelReturnLine): try: wmsID = cancelReturnLine.split()[1] wmsIdList.remove(wmsID) wmsID = self._createId(wmsID) jobNum = wmsToJobMap[wmsID] yield (jobNum, wmsID) except KeyError: # mismatch in GC<->Condor mapping self._log.error('Error with canceled condor job %s', wmsID) self._log.error('\tCondor IDs: %s', wmsIdList) self._log.error('\tProcess message: %s', cancelProcess.getMessage()) raise BackendError('Error while cancelling job %s' % wmsID) # clean up remote work dir if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH: cleanupProcess = self.Pool.LoggedExecute( 'rm -rf %s' % self.getWorkdirPath(jobNum)) self.debugOut("Cleaning up remote workdir:\n " + cleanupProcess.cmd) if cleanupProcess.wait() != 0: if self.explainError(cleanupProcess, cleanupProcess.wait()): pass else: cleanupProcess.logError(self.errorLog) retCode = cancelProcess.wait() if retCode != 0: if self.explainError(cancelProcess, retCode): pass else: cancelProcess.logError(self.errorLog) # clean up if necessary activity.finish() self._tidyUpWorkingDirectory() self.debugFlush()
def show_report(self, job_db, jobnum_list): report_dict_list = [] for jobnum in jobnum_list: job_obj = job_db.get_job_transient(jobnum) if job_obj.state != Job.INIT: report_dict_list.append({ 0: jobnum, 1: Job.enum2str(job_obj.state), 2: job_obj.gc_id }) self._fill_report_dict_list(report_dict_list, job_obj) header_list = ['Job', 'Status / Attempt', 'Id / Destination'] self._show_table(lzip(irange(3), header_list), report_dict_list, 'rcl')
def display(self): reports = [] for jobNum in self._jobs: jobObj = self._jobDB.getJob(jobNum) if not jobObj or (jobObj.state == Job.INIT): continue reports.append({ 0: jobNum, 1: Job.enum2str(jobObj.state), 2: jobObj.gcID }) self._add_details(reports, jobObj) utils.printTabular( lzip(irange(3), ['Job', 'Status / Attempt', 'Id / Destination']), reports, 'rcl')
def backend_list(finder_name): finder = Plugin.get_class('BackendDiscovery').create_instance(finder_name, gc_create_config()) item_dict_list = [] item_key_set = set() for item_dict in finder.discover(): nice_item_dict = {} for (key, value) in item_dict.items(): if isinstance(key, int): key = WMS.enum2str(key) nice_item_dict[key] = value item_key_set.add(key) item_dict_list.append(nice_item_dict) item_key_set.remove('name') item_key_list = sorted(item_key_set) ConsoleTable.create([('name', 'Name')] + lzip(item_key_list, item_key_list), item_dict_list)
def _getJobsOutput(self, wmsJobIdList): if not len(wmsJobIdList): raise StopIteration self.debugOut("Started retrieving: %s" % set(lzip(*wmsJobIdList)[0])) activity = utils.ActivityLog('retrieving job outputs') for wmsId, jobNum in wmsJobIdList: sandpath = self.getSandboxPath(jobNum) if sandpath is None: yield (jobNum, None) continue # when working with a remote spool schedd, tell condor to return files if self.remoteType == PoolType.SPOOL: transferProcess = self.Pool.LoggedExecute( self.transferExec, '%(jobID)s' % {"jobID": self._splitId(wmsId)}) if transferProcess.wait() != 0: if self.explainError(transferProcess, transferProcess.wait()): pass else: transferProcess.logError(self.errorLog) # when working with a remote [gsi]ssh schedd, manually return files elif self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH: transferProcess = self.Pool.LoggedCopyFromRemote( self.getWorkdirPath(jobNum), self.getSandboxPath()) if transferProcess.wait() != 0: if self.explainError(transferProcess, transferProcess.wait()): pass else: transferProcess.logError(self.errorLog) # clean up remote working directory cleanupProcess = self.Pool.LoggedExecute( 'rm -rf %s' % self.getWorkdirPath(jobNum)) self.debugOut("Cleaning up remote workdir: JobID %s\n %s" % (jobNum, cleanupProcess.cmd)) if cleanupProcess.wait() != 0: if self.explainError(cleanupProcess, cleanupProcess.wait()): pass else: cleanupProcess.logError(self.errorLog) yield (jobNum, sandpath) # clean up if necessary activity.finish() self._tidyUpWorkingDirectory() self.debugFlush()
def display(self): reports = [] for jobNum in self._jobs: jobObj = self._jobDB.get(jobNum) if not jobObj or (jobObj.state == Job.INIT): continue reports.append({0: jobNum, 1: Job.enum2str(jobObj.state), 2: jobObj.wmsId}) if utils.verbosity() > 0: history = jobObj.history.items() history.reverse() for at, dest in history: if dest != 'N/A': reports.append({1: at, 2: ' -> ' + dest}) elif jobObj.get('dest', 'N/A') != 'N/A': reports.append({2: ' -> ' + jobObj.get('dest')}) utils.printTabular(lzip(irange(3), ['Job', 'Status / Attempt', 'Id / Destination']), reports, 'rcl')
def cancelJobs(self, wmsJobIdList): if len(wmsJobIdList) == 0: raise StopIteration self.debugOut("Started canceling: %s" % set(lzip(*wmsJobIdList)[0])) self.debugPool() wmsIdList=list(self._getRawIDs(wmsJobIdList)) wmsIdArgument = " ".join(wmsIdList) wmsToJobMap = dict(wmsJobIdList) activity = utils.ActivityLog('cancelling jobs') cancelProcess = self.Pool.LoggedExecute(self.cancelExec, '%(jobIDs)s' % {"jobIDs" : wmsIdArgument }) # check if canceling actually worked for cancelReturnLine in cancelProcess.iter(): if ( cancelReturnLine!= '\n' ) and ( 'marked for removal' in cancelReturnLine ): try: wmsID=cancelReturnLine.split()[1] wmsIdList.remove(wmsID) wmsID=self._createId(wmsID) jobNum=wmsToJobMap[wmsID] yield ( jobNum, wmsID) except KeyError: # mismatch in GC<->Condor mapping self._log.error('Error with canceled condor job %s', wmsID) self._log.error('\tCondor IDs: %s', wmsIdList) self._log.error('\tProcess message: %s', cancelProcess.getMessage()) raise BackendError('Error while cancelling job %s' % wmsID) # clean up remote work dir if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH: cleanupProcess = self.Pool.LoggedExecute('rm -rf %s' % self.getWorkdirPath(jobNum) ) self.debugOut("Cleaning up remote workdir:\n " + cleanupProcess.cmd) if cleanupProcess.wait() != 0: if self.explainError(cleanupProcess, cleanupProcess.wait()): pass else: cleanupProcess.logError(self.errorLog) retCode = cancelProcess.wait() if retCode != 0: if self.explainError(cancelProcess, retCode): pass else: cancelProcess.logError(self.errorLog) # clean up if necessary activity.finish() self._tidyUpWorkingDirectory() self.debugFlush()
def backend_list(finder_name): finder = Plugin.get_class('BackendDiscovery').create_instance( finder_name, gc_create_config()) item_dict_list = [] item_key_set = set() for item_dict in finder.discover(): nice_item_dict = {} for (key, value) in item_dict.items(): if isinstance(key, int): key = WMS.enum2str(key) nice_item_dict[key] = value item_key_set.add(key) item_dict_list.append(nice_item_dict) item_key_set.remove('name') item_key_list = sorted(item_key_set) ConsoleTable.create([('name', 'Name')] + lzip(item_key_list, item_key_list), item_dict_list)
def _draw_pie(numpy, axis, js_dict, pos, size, piecolor=None): def _sum(job_class): return sum(imap(js_dict.get, job_class.state_list)) piecolor = piecolor or ['red', 'orange', 'green', 'blue', 'purple'] breakdown = lmap(_sum, [ JobClass.FAILING, JobClass.RUNNING, JobClass.SUCCESS, JobClass.DONE, JobClass.ATWMS ]) breakdown = [0] + list(numpy.cumsum(breakdown) * 1.0 / sum(breakdown)) for idx in irange(len(breakdown) - 1): fracs = numpy.linspace(2 * math.pi * breakdown[idx], 2 * math.pi * breakdown[idx + 1], 20) loc_x = [0] + numpy.cos(fracs).tolist() loc_y = [0] + numpy.sin(fracs).tolist() axis.scatter(pos[0], pos[1], marker=(lzip(loc_x, loc_y), 0), s=size, facecolor=piecolor[idx % len(piecolor)])
def _parseParameterTuple(self, varName, tupleValue, tupleType, varType, varIndex): if tupleType == 'tuple': tupleDelimeter = self.get(self._getParameterOption(varName), 'delimeter', ',') tupleStrings = lmap(str.strip, utils.split_advanced(tupleValue, lambda tok: tok in ' \n', lambda tok: False)) tupleList = lmap(lambda t: parseTuple(t, tupleDelimeter), tupleStrings) elif tupleType == 'binning': tupleList = lzip(tupleValue.split(), tupleValue.split()[1:]) result = [] for tupleEntry in tupleList: try: tmp = self._parseParameter(varName, tupleEntry[varIndex], varType) except Exception: raise ConfigError('Unable to parse %r' % repr((tupleEntry, tupleStrings))) if isinstance(tmp, list): if len(tmp) != 1: raise ConfigError('[Variable: %s] Tuple entry (%s) expands to multiple variable entries (%s)!' % (varName, tupleEntry[varIndex], tmp)) result.append(tmp[0]) else: result.append(tmp) return result
def _getJobsOutput(self, wmsJobIdList): if not len(wmsJobIdList): raise StopIteration self.debugOut("Started retrieving: %s" % set(lzip(*wmsJobIdList)[0])) activity = Activity('retrieving job outputs') for gcID, jobNum in wmsJobIdList: sandpath = self.getSandboxPath(jobNum) if sandpath is None: yield (jobNum, None) continue # when working with a remote spool schedd, tell condor to return files if self.remoteType == PoolType.SPOOL: transferProcess = self.Pool.LoggedExecute(self.transferExec, '%(jobID)s' % {"jobID" : self._splitId(gcID) }) if transferProcess.wait() != 0: if self.explainError(transferProcess, transferProcess.wait()): pass else: transferProcess.logError(self.errorLog) # when working with a remote [gsi]ssh schedd, manually return files elif self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH: transferProcess = self.Pool.LoggedCopyFromRemote( self.getWorkdirPath(jobNum), self.getSandboxPath()) if transferProcess.wait() != 0: if self.explainError(transferProcess, transferProcess.wait()): pass else: transferProcess.logError(self.errorLog) # clean up remote working directory cleanupProcess = self.Pool.LoggedExecute('rm -rf %s' % self.getWorkdirPath(jobNum) ) self.debugOut("Cleaning up remote workdir: JobID %s\n %s"%(jobNum,cleanupProcess.cmd)) if cleanupProcess.wait() != 0: if self.explainError(cleanupProcess, cleanupProcess.wait()): pass else: cleanupProcess.logError(self.errorLog) yield (jobNum, sandpath) # clean up if necessary activity.finish() self._tidyUpWorkingDirectory() self.debugFlush()
def display(self): reports = [] for jobNum in self._jobs: jobObj = self._jobDB.get(jobNum) if not jobObj or (jobObj.state == Job.INIT): continue reports.append({ 0: jobNum, 1: Job.enum2str(jobObj.state), 2: jobObj.wmsId }) if utils.verbosity() > 0: history = jobObj.history.items() history.reverse() for at, dest in history: if dest != 'N/A': reports.append({1: at, 2: ' -> ' + dest}) elif jobObj.get('dest', 'N/A') != 'N/A': reports.append({2: ' -> ' + jobObj.get('dest')}) utils.printTabular( lzip(irange(3), ['Job', 'Status / Attempt', 'Id / Destination']), reports, 'rcl')
def __init__(self, head, data, fmt = None, top = True): self.table = """ <style type="text/css"> table {font-size:12px;color:#333333;border-width: 1px;border-color: #7799aa;border-collapse: collapse;} th {font-size:12px;background-color:#aacccc;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;text-align:left;} tr {background-color:#ffffff;} td {font-size:12px;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;} </style>""" fmt = fmt or {} lookupDict = lmap(lambda id_name: (id_name[0], fmt.get(id_name[0], str)), head) headerList = lmap(lambda id_name: '<th>%s</th>' % id_name[1], head) def entryList(entry): return lmap(lambda id_fmt: '<td>%s</td>' % id_fmt[1](entry.get(id_fmt[0])), lookupDict) rowList = [headerList] + lmap(entryList, data) if not top: rowList = lzip(*rowList) rows = lmap(lambda row: '\t<tr>%s</tr>\n' % str.join('', row), rowList) if top: widthStr = 'width:100%;' else: widthStr = '' self.table += '<table style="%s" border="1">\n%s</table>' % (widthStr, str.join('', rows))
def get_body(self): fmt_dict = self._fmt_dict or {} lookup_dict = lmap( lambda id_name: (id_name[0], fmt_dict.get(id_name[0], str)), self._head) header_list = lmap(lambda id_name: _tag('th', id_name[1]), self._head) def _make_entry_list(entry): return lmap( lambda id_fmt: _tag('td', id_fmt[1](entry.get(id_fmt[0]))), lookup_dict) row_list = [header_list] + lmap(_make_entry_list, self._data) width_str = 'width:100%;' if not self._pivot: row_list = lzip(*row_list) width_str = '' return _tag('table', str.join( '', lmap(lambda row: _tag('tr', str.join('', row)), row_list)), style=width_str, border=1)
def jobs(self, *args, **kw): result = '<body>' result += str(CPProgressBar(0, min(100, self.counter), 100, 300)) if 'job' in kw: jobNum = int(kw['job']) info = self.task.getJobConfig(jobNum) result += str(TabularHTML(lzip(sorted(info), sorted(info)), [info], top = False)) def getJobObjs(): for jobNum in self.jobMgr.jobDB.getJobs(): result = self.jobMgr.jobDB.get(jobNum).__dict__ result['jobNum'] = jobNum result.update(result['dict']) yield result fmtTime = lambda t: time.strftime('%Y-%m-%d %T', time.localtime(t)) result += str(TabularHTML([ ('jobNum', 'Job'), ('state', 'Status'), ('attempt', 'Attempt'), ('wmsId', 'WMS ID'), ('dest', 'Destination'), ('submitted', 'Submitted') ], getJobObjs(), fmt = { 'jobNum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x), 'state': Job.enum2str, 'submitted': fmtTime }, top = True)) result += '</body>' return result
if len(args) != 1: utils.exitWithUsage(parser.usage('part')) splitter = DataSplitter.loadPartitionsForScript(args[0]) if opts.partition_list_invalid: utils.printTabular([(0, 'Job')], partition_invalid(splitter)) if opts.partition_list is not None: if opts.partition_list in ('', 'all'): keyStrings = DataSplitter.enumNames else: keyStrings = opts.partition_list.split(',') keyList = lmap(DataSplitter.str2enum, keyStrings) if None in keyList: logging.warning('Available keys: %r', DataSplitter.enumNames) utils.printTabular([('jobNum', 'Job')] + lzip(keyList, keyStrings), partition_list(splitter, keyList)) if opts.partition_check: logging.info('Checking %d jobs...', splitter.getMaxJobs()) partition_check(splitter) ######################################################## # JOBS def jobs_reset_attempts(jobDB, selected): for jobNum in jobDB.getJobsIter(selected): logging.info('Resetting attempts for job %d', jobNum) jobObj = jobDB.getJob(jobNum) jobObj.attempt = 0 jobObj.history = {} for key in jobObj.dict.keys():
if len(args) != 1: utils.exitWithUsage(parser.usage('part')) splitter = DataSplitter.loadStateForScript(args[0]) if opts.partition_list_invalid: utils.printTabular([(0, 'Job')], partition_invalid(splitter)) if opts.partition_list is not None: if opts.partition_list: keyStrings = opts.partition_list.split(',') else: keyStrings = DataSplitter.enumNames keyList = lmap(DataSplitter.str2enum, keyStrings) if None in keyList: logging.warning('Available keys: %r', DataSplitter.enumNames) utils.printTabular([('jobNum', 'Job')] + lzip(keyList, keyStrings), partition_list(splitter, keyList)) if opts.partition_check: logging.info('Checking %d jobs...', splitter.getMaxJobs()) partition_check(splitter) ######################################################## # JOBS def jobs_reset_attempts(jobDB, selected): for jobNum in jobDB.getJobsIter(selected): logging.info('Resetting attempts for job %d', jobNum) jobinfo = jobDB.get(jobNum) jobinfo.attempt = 0
def checkJobs(self, wmsJobIdList): if len(wmsJobIdList) == 0: raise StopIteration self.debugOut('Started checking: %s' % set(lzip(*wmsJobIdList)[0])) self.debugPool() wmsIdList=list(self._getRawIDs(wmsJobIdList)) wmsIdArgument = ' '.join(wmsIdList) wmsToJobMap = dict(wmsJobIdList) activity = utils.ActivityLog('fetching job status') statusProcess = self.Pool.LoggedExecute(self.statusExec, '%(format)s %(jobIDs)s' % {"jobIDs" : wmsIdArgument, "format" : self.statusReturnFormat }) activity.finish() activity = utils.ActivityLog('checking job status') # process all lines of the status executable output utils.vprint('querrying condor_q', 2) for statusReturnLine in statusProcess.iter(): try: # test if wmsID job was requested, then extact data and remove from check list if statusReturnLine.split()[0] in wmsIdList: ( jobID, wmsID, status, jobinfo ) = self._statusReturnLineRead(statusReturnLine) wmsIdList.remove(wmsID) yield ( jobID, self._createId(wmsID), status, jobinfo ) except Exception: raise BackendError('Error reading job status info:\n%s' % statusReturnLine) # cleanup after final yield retCode = statusProcess.wait() if retCode != 0: if self.explainError(statusProcess, retCode): pass else: statusProcess.logError(self.errorLog, brief=True) activity.finish() self.debugOut("Remaining after condor_q: %s" % wmsIdList) # jobs not in queue have either succeeded or failed - both is considered 'Done' for GC # if no additional information is required, consider everything we couldn't find as done if retCode == 0: for wmsID in list(wmsIdList): wmsIdList.remove(wmsID) wmsID=self._createId(wmsID) yield ( wmsToJobMap[wmsID], wmsID, Job.DONE, {} ) # TODO: querry log on properly configured pool # querying the history can be SLOW! only do when necessary and possible if False and len(wmsIdList) > 0 and self.remoteType != PoolType.SPOOL: utils.vprint('querrying condor_history', 2) # querying the history can be VERY slow! Only do so bit by bit if possible if self.historyFile: historyList = sorted([ "-f "+ file for file in ifilter(os.path.isfile, glob.glob(self.historyFile+"*")) ]) else: historyList=[""] # query the history file by file until no more jobs need updating for historyFile in historyList: if len(wmsIdList) > 0: statusArgs = '%(fileQuery)s %(format)s %(jobIDs)s' % {"fileQuery": historyFile, "jobIDs" : " ", "format" : self.statusReturnFormat} statusProcess = self.Pool.LoggedExecute(self.historyExec, statusArgs) for statusReturnLine in statusProcess.iter(): # test if line starts with a number and was requested try: # test if wmsID job was requested, then extact data and remove from check list if statusReturnLine.split()[0] in wmsIdList: ( jobID, wmsID, status, jobinfo ) = self._statusReturnLineRead(statusReturnLine) wmsIdList.remove(wmsID) yield ( jobID, self._createId(wmsID), status, jobinfo ) except Exception: raise BackendError('Error reading job status info:\n%s' % statusReturnLine) # cleanup after final yield retCode = statusProcess.wait() if retCode != 0: if self.explainError(statusProcess, retCode): pass else: statusProcess.logError(self.errorLog, brief=True) self.debugFlush()
def submitJobs(self, jobNumListFull, module): submitBatch=25 for index in irange(0, len(jobNumListFull), submitBatch): jobNumList=jobNumListFull[index:index+submitBatch] self.debugOut("\nStarted submitting: %s" % jobNumList) self.debugPool() # get the full job config path and basename def _getJobCFG(jobNum): return os.path.join(self.getSandboxPath(jobNum), 'job_%d.var' % jobNum), 'job_%d.var' % jobNum activity = Activity('preparing jobs') # construct a temporary JDL for this batch of jobs jdlDescriptor, jdlFilePath = tempfile.mkstemp(suffix='.jdl') jdlSubmitPath = jdlFilePath self.debugOut("Writing temporary jdl to: "+jdlSubmitPath) try: data = self.makeJDLdata(jobNumList, module) utils.safeWrite(os.fdopen(jdlDescriptor, 'w'), data) except Exception: utils.removeFiles([jdlFilePath]) raise BackendError('Could not write jdl data to %s.' % jdlFilePath) # create the _jobconfig.sh file containing the actual data for jobNum in jobNumList: try: self._writeJobConfig(_getJobCFG(jobNum)[0], jobNum, module, {}) except Exception: raise BackendError('Could not write _jobconfig data for %s.' % jobNum) self.debugOut("Copying to remote") # copy infiles to ssh/gsissh remote pool if required if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH: activity = Activity('preparing remote scheduler') self.debugOut("Copying to sandbox") workdirBase = self.getWorkdirPath() # TODO: check whether shared remote files already exist and copy otherwise for _, fileSource, fileTarget in self._getSandboxFilesIn(module): copyProcess = self.Pool.LoggedCopyToRemote(fileSource, os.path.join(workdirBase, fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy job config files self.debugOut("Copying job configs") for jobNum in jobNumList: fileSource, fileTarget = _getJobCFG(jobNum) copyProcess = self.Pool.LoggedCopyToRemote(fileSource, os.path.join(self.getWorkdirPath(jobNum), fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy jdl self.debugOut("Copying jdl") jdlSubmitPath = os.path.join(workdirBase, os.path.basename(jdlFilePath)) copyProcess = self.Pool.LoggedCopyToRemote(jdlFilePath, jdlSubmitPath ) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy proxy for authFile in self._token.getAuthFiles(): self.debugOut("Copying proxy") copyProcess = self.Pool.LoggedCopyToRemote(authFile, os.path.join(self.getWorkdirPath(), os.path.basename(authFile))) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() self.debugOut("Starting jobs") try: # submit all jobs simultaneously and temporarily store verbose (ClassAdd) output activity = Activity('queuing jobs at scheduler') proc = self.Pool.LoggedExecute(self.submitExec, ' -verbose %(JDL)s' % { "JDL": jdlSubmitPath }) self.debugOut("AAAAA") # extract the Condor ID (WMS ID) of the jobs from output ClassAds wmsJobIdList = [] for line in proc.iter(): if "GridControl_GCIDtoWMSID" in line: GCWMSID=line.split('=')[1].strip(' "\n').split('@') GCID,WMSID=int(GCWMSID[0]),GCWMSID[1].strip() # Condor creates a default job then overwrites settings on any subsequent job - i.e. skip every second, but better be sure if ( not wmsJobIdList ) or ( GCID not in lzip(*wmsJobIdList)[0] ): wmsJobIdList.append((self._createId(WMSID),GCID)) if "GridControl_GCtoWMSID" in line: self.debugOut("o : %s" % line) self.debugOut("o : %s" % wmsJobIdList) retCode = proc.wait() activity.finish() if (retCode != 0) or ( len(wmsJobIdList) < len(jobNumList) ): if self.explainError(proc, retCode): pass else: self._log.error('Submitted %4d jobs of %4d expected', len(wmsJobIdList), len(jobNumList)) proc.logError(self.errorLog, jdl = jdlFilePath) finally: utils.removeFiles([jdlFilePath]) self.debugOut("Done Submitting") # yield the (jobNum, WMS ID, other data) of each job successively for index in irange(len(wmsJobIdList)): yield (wmsJobIdList[index][1], wmsJobIdList[index][0], {} ) self.debugOut("Yielded submitted job") self.debugFlush()
def checkJobs(self, wmsJobIdList): if len(wmsJobIdList) == 0: raise StopIteration self.debugOut('Started checking: %s' % set(lzip(*wmsJobIdList)[0])) self.debugPool() wmsIdList = list(self._getRawIDs(wmsJobIdList)) wmsIdArgument = ' '.join(wmsIdList) wmsToJobMap = dict(wmsJobIdList) activity = utils.ActivityLog('fetching job status') statusProcess = self.Pool.LoggedExecute( self.statusExec, '%(format)s %(jobIDs)s' % { "jobIDs": wmsIdArgument, "format": self.statusReturnFormat }) activity.finish() activity = utils.ActivityLog('checking job status') # process all lines of the status executable output utils.vprint('querrying condor_q', 2) for statusReturnLine in statusProcess.iter(): try: # test if wmsID job was requested, then extact data and remove from check list if statusReturnLine.split()[0] in wmsIdList: (jobID, wmsID, status, jobinfo) = self._statusReturnLineRead(statusReturnLine) wmsIdList.remove(wmsID) yield (jobID, self._createId(wmsID), status, jobinfo) except Exception: raise BackendError('Error reading job status info:\n%s' % statusReturnLine) # cleanup after final yield retCode = statusProcess.wait() if retCode != 0: if self.explainError(statusProcess, retCode): pass else: statusProcess.logError(self.errorLog, brief=True) activity.finish() self.debugOut("Remaining after condor_q: %s" % wmsIdList) # jobs not in queue have either succeeded or failed - both is considered 'Done' for GC # if no additional information is required, consider everything we couldn't find as done if retCode == 0: for wmsID in list(wmsIdList): wmsIdList.remove(wmsID) wmsID = self._createId(wmsID) yield (wmsToJobMap[wmsID], wmsID, Job.DONE, {}) # TODO: querry log on properly configured pool # querying the history can be SLOW! only do when necessary and possible if False and len(wmsIdList) > 0 and self.remoteType != PoolType.SPOOL: utils.vprint('querrying condor_history', 2) # querying the history can be VERY slow! Only do so bit by bit if possible if self.historyFile: historyList = sorted([ "-f " + file for file in ifilter( os.path.isfile, glob.glob(self.historyFile + "*")) ]) else: historyList = [""] # query the history file by file until no more jobs need updating for historyFile in historyList: if len(wmsIdList) > 0: statusArgs = '%(fileQuery)s %(format)s %(jobIDs)s' % { "fileQuery": historyFile, "jobIDs": " ", "format": self.statusReturnFormat } statusProcess = self.Pool.LoggedExecute( self.historyExec, statusArgs) for statusReturnLine in statusProcess.iter(): # test if line starts with a number and was requested try: # test if wmsID job was requested, then extact data and remove from check list if statusReturnLine.split()[0] in wmsIdList: (jobID, wmsID, status, jobinfo) = self._statusReturnLineRead( statusReturnLine) wmsIdList.remove(wmsID) yield (jobID, self._createId(wmsID), status, jobinfo) except Exception: raise BackendError( 'Error reading job status info:\n%s' % statusReturnLine) # cleanup after final yield retCode = statusProcess.wait() if retCode != 0: if self.explainError(statusProcess, retCode): pass else: statusProcess.logError(self.errorLog, brief=True) self.debugFlush()
def submitJobs(self, jobNumListFull, module): submitBatch = 25 for index in irange(0, len(jobNumListFull), submitBatch): jobNumList = jobNumListFull[index:index + submitBatch] self.debugOut("\nStarted submitting: %s" % jobNumList) self.debugPool() # get the full job config path and basename def _getJobCFG(jobNum): return os.path.join(self.getSandboxPath(jobNum), 'job_%d.var' % jobNum), 'job_%d.var' % jobNum activity = utils.ActivityLog('preparing jobs') # construct a temporary JDL for this batch of jobs jdlDescriptor, jdlFilePath = tempfile.mkstemp(suffix='.jdl') jdlSubmitPath = jdlFilePath self.debugOut("Writing temporary jdl to: " + jdlSubmitPath) try: data = self.makeJDLdata(jobNumList, module) utils.safeWrite(os.fdopen(jdlDescriptor, 'w'), data) except Exception: utils.removeFiles([jdlFilePath]) raise BackendError('Could not write jdl data to %s.' % jdlFilePath) # create the _jobconfig.sh file containing the actual data for jobNum in jobNumList: try: self._writeJobConfig( _getJobCFG(jobNum)[0], jobNum, module, {}) except Exception: raise BackendError( 'Could not write _jobconfig data for %s.' % jobNum) self.debugOut("Copying to remote") # copy infiles to ssh/gsissh remote pool if required if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH: activity = utils.ActivityLog('preparing remote scheduler') self.debugOut("Copying to sandbox") workdirBase = self.getWorkdirPath() # TODO: check whether shared remote files already exist and copy otherwise for _, fileSource, fileTarget in self._getSandboxFilesIn( module): copyProcess = self.Pool.LoggedCopyToRemote( fileSource, os.path.join(workdirBase, fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy job config files self.debugOut("Copying job configs") for jobNum in jobNumList: fileSource, fileTarget = _getJobCFG(jobNum) copyProcess = self.Pool.LoggedCopyToRemote( fileSource, os.path.join(self.getWorkdirPath(jobNum), fileTarget)) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy jdl self.debugOut("Copying jdl") jdlSubmitPath = os.path.join(workdirBase, os.path.basename(jdlFilePath)) copyProcess = self.Pool.LoggedCopyToRemote( jdlFilePath, jdlSubmitPath) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() # copy proxy for authFile in self._token.getAuthFiles(): self.debugOut("Copying proxy") copyProcess = self.Pool.LoggedCopyToRemote( authFile, os.path.join(self.getWorkdirPath(), os.path.basename(authFile))) if copyProcess.wait() != 0: if self.explainError(copyProcess, copyProcess.wait()): pass else: copyProcess.logError(self.errorLog, brief=True) self.debugFlush() self.debugOut("Starting jobs") try: # submit all jobs simultaneously and temporarily store verbose (ClassAdd) output activity = utils.ActivityLog('queuing jobs at scheduler') proc = self.Pool.LoggedExecute( self.submitExec, ' -verbose %(JDL)s' % {"JDL": jdlSubmitPath}) self.debugOut("AAAAA") # extract the Condor ID (WMS ID) of the jobs from output ClassAds wmsJobIdList = [] for line in proc.iter(): if "GridControl_GCIDtoWMSID" in line: GCWMSID = line.split('=')[1].strip(' "\n').split('@') GCID, WMSID = int(GCWMSID[0]), GCWMSID[1].strip() # Condor creates a default job then overwrites settings on any subsequent job - i.e. skip every second, but better be sure if (not wmsJobIdList) or (GCID not in lzip( *wmsJobIdList)[0]): wmsJobIdList.append((self._createId(WMSID), GCID)) if "GridControl_GCtoWMSID" in line: self.debugOut("o : %s" % line) self.debugOut("o : %s" % wmsJobIdList) retCode = proc.wait() activity.finish() if (retCode != 0) or (len(wmsJobIdList) < len(jobNumList)): if self.explainError(proc, retCode): pass else: utils.eprint("Submitted %4d jobs of %4d expected" % (len(wmsJobIdList), len(jobNumList))) proc.logError(self.errorLog, jdl=jdlFilePath) finally: utils.removeFiles([jdlFilePath]) self.debugOut("Done Submitting") # yield the (jobNum, WMS ID, other data) of each job successively for index in irange(len(wmsJobIdList)): yield (wmsJobIdList[index][1], wmsJobIdList[index][0], {}) self.debugOut("Yielded submitted job") self.debugFlush()
def show_report(self, job_db, jobnum_list): self._show_table(lzip(Job.enum_value_list, Job.enum_name_list), [self._get_job_state_dict(job_db, jobnum_list)], pivot=True)
def parse_tuples(self, pconfig, varexpr, output_vn, value): # eg. '11 12 13 14' -> [(11, 12), (12, 13), (13, 14)] -> [12, 13, 14] tuple_token_list = value.split() return lzip(tuple_token_list, tuple_token_list[1:])
def replaceDict(result, allVars, varMapping = None): for (virtual, real) in (varMapping or lzip(allVars.keys(), allVars.keys())): for delim in ['@', '__']: result = result.replace(delim + virtual + delim, str(allVars.get(real, ''))) return result