def split_brackets(tokens, brackets = None, exType = Exception): if brackets is None: brackets = ['()', '{}', '[]'] buffer = '' stack_bracket = [] map_close_to_open = dict(imap(lambda x: (x[1], x[0]), brackets)) position = 0 for token in tokens: position += len(token) # store position for proper error messages if token in map_close_to_open.values(): stack_bracket.append((token, position)) if token in map_close_to_open.keys(): if not stack_bracket: raise exType('Closing bracket %r at position %d is without opening bracket' % (token, position)) elif stack_bracket[-1][0] == map_close_to_open[token]: stack_bracket.pop() if not stack_bracket: buffer += token yield buffer buffer = '' continue else: raise exType('Closing bracket %r at position %d does not match bracket %r at position %d' % (token, position, stack_bracket[-1][0], stack_bracket[-1][1])) if stack_bracket: buffer += token else: yield token if stack_bracket: raise exType('Unclosed brackets %s' % str.join(', ', imap(lambda b_pos: '%r at position %d' % b_pos, stack_bracket)))
def _resyncInternal(self): # This function is _VERY_ time critical! tmp = self._rawSource.resync() # First ask about psource changes (redoNewPNum, disableNewPNum, sizeChange) = (set(tmp[0]), set(tmp[1]), tmp[2]) hashNew = self._rawSource.getHash() hashChange = self._storedHash != hashNew self._storedHash = hashNew if not (redoNewPNum or disableNewPNum or sizeChange or hashChange): self._resyncState = None return psource_old = ParameterAdapter(None, ParameterSource.createInstance('GCDumpParameterSource', self._pathParams)) psource_new = ParameterAdapter(None, self._rawSource) mapJob2PID = {} (pAdded, pMissing, _) = self._diffParams(psource_old, psource_new, mapJob2PID, redoNewPNum, disableNewPNum) self._source = self._getResyncSource(psource_old, psource_new, mapJob2PID, pAdded, pMissing, disableNewPNum) self._mapJob2PID = mapJob2PID # Update Job2PID map redoNewPNum = redoNewPNum.difference(disableNewPNum) if redoNewPNum or disableNewPNum: mapPID2Job = dict(ismap(utils.swap, self._mapJob2PID.items())) translate = lambda pNum: mapPID2Job.get(pNum, pNum) self._resyncState = (set(imap(translate, redoNewPNum)), set(imap(translate, disableNewPNum)), sizeChange) elif sizeChange: self._resyncState = (set(), set(), sizeChange) # Write resynced state self._writeJob2PID(self._pathJob2PID + '.tmp') ParameterSource.getClass('GCDumpParameterSource').write(self._pathParams + '.tmp', self) os.rename(self._pathJob2PID + '.tmp', self._pathJob2PID) os.rename(self._pathParams + '.tmp', self._pathParams)
def collapse_psp_list(psp_list, tracked_list, opts): psp_dict = {} psp_dict_nicks = {} header_list = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in tracked_list: tracked_list.remove('DATASETSPLIT') if opts.collapse == 1: tracked_list.append('DATASETNICK') header_list.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: header_list.append(('COLLATE_NICK', '# of nicks')) for pset in psp_list: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') hash_str = md5_hex(repr(lmap(lambda key: pset.get(str(key)), tracked_list))) psp_dict.setdefault(hash_str, []).append(pset) psp_dict_nicks.setdefault(hash_str, set()).add(nickname) def _do_collate(hash_str): psp = psp_dict[hash_str][0] psp['COLLATE_JOBS'] = len(psp_dict[hash_str]) psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str]) return psp psp_list = sorted(imap(_do_collate, psp_dict), key=lambda x: tuple(imap(str, x.values()))) return (header_list, psp_list)
def get_job_dict(self, jobnum): # Get job dependent environment variables job_env_dict = SCRAMTask.get_job_dict(self, jobnum) if not self._has_dataset: job_env_dict['MAX_EVENTS'] = self._events_per_job job_env_dict.update(dict(self._cmssw_search_dict)) if self._do_gzip_std_output: job_env_dict['GZIP_OUT'] = 'yes' if self._project_area_tarball_on_se: job_env_dict['SE_RUNTIME'] = 'yes' if self._project_area: job_env_dict['HAS_RUNTIME'] = 'yes' job_env_dict['CMSSW_EXEC'] = 'cmsRun' job_env_dict['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self._config_fn_list)) job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top if self.prolog.is_active(): job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command() job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list())) job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments() if self.epilog.is_active(): job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command() job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list())) job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments() return job_env_dict
def _resync_adapter(self, pa_old, pa_new, result_redo, result_disable, size_change): (map_jobnum2pnum, pspi_list_added, pspi_list_missing) = _diff_pspi_list(pa_old, pa_new, result_redo, result_disable) # Reorder and reconstruct parameter space with the following layout: # NNNNNNNNNNNNN OOOOOOOOO | source: NEW (==self) and OLD (==from file) # <same><added> <missing> | same: both in NEW and OLD, added: only in NEW, missing: only in OLD if pspi_list_added: _extend_map_jobnum2pnum(map_jobnum2pnum, pa_old.get_job_len(), pspi_list_added) if pspi_list_missing: # extend the parameter source by placeholders for the missing parameter space points psrc_missing = _create_placeholder_psrc(pa_old, pa_new, map_jobnum2pnum, pspi_list_missing, result_disable) self._psrc = ParameterSource.create_instance('ChainParameterSource', self._psrc_raw, psrc_missing) self._map_jobnum2pnum = map_jobnum2pnum # Update Job2PID map # Write resynced state self._write_jobnum2pnum(self._path_jobnum2pnum + '.tmp') ParameterSource.get_class('GCDumpParameterSource').write(self._path_params + '.tmp', self.get_job_len(), self.get_job_metadata(), self.iter_jobs()) os.rename(self._path_jobnum2pnum + '.tmp', self._path_jobnum2pnum) os.rename(self._path_params + '.tmp', self._path_params) result_redo = result_redo.difference(result_disable) if result_redo or result_disable: map_pnum2jobnum = reverse_dict(self._map_jobnum2pnum) def _translate_pnum(pnum): return map_pnum2jobnum.get(pnum, pnum) result_redo = set(imap(_translate_pnum, result_redo)) result_disable = set(imap(_translate_pnum, result_disable)) return (result_redo, result_disable, size_change) return (set(), set(), size_change)
def __init__(self, jobDB, task, jobs = None, configString = ''): Report.__init__(self, jobDB, task, jobs, configString) catJobs = {} catDescDict = {} # Assignment of jobs to categories (depending on variables and using datasetnick if available) jobConfig = {} for jobNum in self._jobs: if task: jobConfig = task.getJobConfig(jobNum) varList = sorted(ifilter(lambda var: '!' not in repr(var), jobConfig.keys())) if 'DATASETSPLIT' in varList: varList.remove('DATASETSPLIT') varList.append('DATASETNICK') catKey = str.join('|', imap(lambda var: '%s=%s' % (var, jobConfig[var]), varList)) catJobs.setdefault(catKey, []).append(jobNum) if catKey not in catDescDict: catDescDict[catKey] = dict(imap(lambda var: (var, jobConfig[var]), varList)) # Kill redundant keys from description commonVars = dict(imap(lambda var: (var, jobConfig[var]), varList)) # seed with last varList for catKey in catDescDict: for key in list(commonVars.keys()): if key not in catDescDict[catKey].keys(): commonVars.pop(key) elif commonVars[key] != catDescDict[catKey][key]: commonVars.pop(key) for catKey in catDescDict: for commonKey in commonVars: catDescDict[catKey].pop(commonKey) # Generate job-category map with efficient int keys - catNum becomes the new catKey self._job2cat = {} self._catDescDict = {} for catNum, catKey in enumerate(sorted(catJobs)): self._catDescDict[catNum] = catDescDict[catKey] self._job2cat.update(dict.fromkeys(catJobs[catKey], catNum))
def __init__(self, config, job_db, task): map_cat2jobs = {} map_cat2desc = {} job_config_dict = {} vn_list = [] for jobnum in job_db.get_job_list(): if task: job_config_dict = task.get_job_dict(jobnum) vn_list = lfilter(self._is_not_ignored_vn, sorted(job_config_dict.keys())) cat_key = str.join('|', imap(lambda vn: '%s=%s' % (vn, job_config_dict[vn]), vn_list)) map_cat2jobs.setdefault(cat_key, []).append(jobnum) if cat_key not in map_cat2desc: map_cat2desc[cat_key] = dict(imap(lambda var: (var, job_config_dict[var]), vn_list)) # Kill redundant keys from description - seed with last vn_list common_var_dict = dict(imap(lambda var: (var, job_config_dict[var]), vn_list)) for cat_key in map_cat2desc: for key in list(common_var_dict.keys()): if key not in map_cat2desc[cat_key].keys(): common_var_dict.pop(key) elif common_var_dict[key] != map_cat2desc[cat_key][key]: common_var_dict.pop(key) for cat_key in map_cat2desc: for common_key in common_var_dict: map_cat2desc[cat_key].pop(common_key) # Generate job-category map with efficient int keys - catNum becomes the new cat_key self._job2cat = {} self._map_cat2desc = {} for cat_num, cat_key in enumerate(sorted(map_cat2jobs)): self._map_cat2desc[cat_num] = map_cat2desc[cat_key] self._job2cat.update(dict.fromkeys(map_cat2jobs[cat_key], cat_num))
def _get_section_key(self, section): tmp = section.split() if not tmp: raise ConfigError('Invalid config section %r' % section) (cur_section, cur_name_list, cur_tag_map) = (tmp[0], [], {}) for token in tmp[1:]: if ':' in token: tag_entry = token.split(':') if len(tag_entry) != 2: raise ConfigError('Invalid config tag in section %r' % section) cur_tag_map[tag_entry[0]] = tag_entry[1] elif token: cur_name_list.append(token) class_section_idx = safe_index(self._class_section_list, cur_section) section_idx = safe_index(self._section_list, cur_section) if (not self._class_section_list) and (not self._section_list): section_idx = 0 if (class_section_idx is not None) or (section_idx is not None): # Section is selected by class or manually name_idx_tuple = tuple(imap(lambda n: safe_index(self._section_name_list, n), cur_name_list)) if None not in name_idx_tuple: # All names in current section are selected cur_tag_name_list = lfilter(cur_tag_map.__contains__, self._section_tag_order) left_tag_name_list = lfilter(lambda tn: tn not in self._section_tag_order, cur_tag_map) tag_tuple_list = imap(lambda tn: (tn, cur_tag_map[tn]), cur_tag_name_list) tag_idx_tuple = tuple(imap(lambda tt: safe_index(self._section_tag_list, tt), tag_tuple_list)) if (None not in tag_idx_tuple) and not left_tag_name_list: return (class_section_idx, section_idx, name_idx_tuple, tag_idx_tuple)
def _get_sandbox_file_list(self, task, sm_list): # Prepare all input files dep_list = set(ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list))) dep_fn_list = lmap(lambda dep: resolve_path('env.%s.sh' % dep, lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list) task_config_dict = dict_union(self._remote_event_handler.get_mon_env_dict(), *imap(lambda x: x.get_task_dict(), [task] + sm_list)) task_config_dict.update({'GC_DEPFILES': str.join(' ', dep_list), 'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name}) task_config_str_list = DictFormat(escape_strings=True).format( task_config_dict, format='export %s%s%s\n') vn_alias_dict = dict(izip(self._remote_event_handler.get_mon_env_dict().keys(), self._remote_event_handler.get_mon_env_dict().keys())) vn_alias_dict.update(task.get_var_alias_map()) vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n') # Resolve wildcards in task input files def _get_task_fn_list(): for fpi in task.get_sb_in_fpi_list(): matched = glob.glob(fpi.path_abs) if matched != []: for match in matched: yield match else: yield fpi.path_abs return lchain([self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [ VirtualFile('_config.sh', sorted(task_config_str_list)), VirtualFile('_varmap.dat', sorted(vn_alias_str_list))]])
def _parseTime(self, time_str): result = 0 entry_map = {'yea': 365 * 24 * 60 * 60, 'day': 24 * 60 * 60, 'hou': 60 * 60, 'min': 60, 'sec': 1} tmp = time_str.split() for (entry, value) in izip(imap(lambda x: x[:3], tmp[1::2]), imap(int, tmp[::2])): result += entry_map[entry] * value return result
def __init__(self, head, data, delimeter='|'): ConsoleTable.__init__(self) head = list(head) self._delimeter = delimeter self._write_line(str.join(self._delimeter, imap(lambda x: x[1], head))) for entry in data: if isinstance(entry, dict): self._write_line(str.join(self._delimeter, imap(lambda x: str(entry.get(x[0], '')), head)))
def process(self, pNum, splitInfo, result): if not self._lumi_filter.empty(): lumi_filter = self._lumi_filter.lookup(splitInfo[DataSplitter.Nickname], is_selector = False) if lumi_filter: idxRuns = splitInfo[DataSplitter.MetadataHeader].index("Runs") iterRuns = ichain(imap(lambda m: m[idxRuns], splitInfo[DataSplitter.Metadata])) short_lumi_filter = filterLumiFilter(list(iterRuns), lumi_filter) result['LUMI_RANGE'] = str.join(',', imap(lambda lr: '"%s"' % lr, formatLumi(short_lumi_filter)))
def _parse_status(self, value, default): if any(imap(lambda x: x in value, ['E', 'e'])): return Job.UNKNOWN if any(imap(lambda x: x in value, ['h', 's', 'S', 'T', 'w'])): return Job.QUEUED if any(imap(lambda x: x in value, ['r', 't'])): return Job.RUNNING return Job.READY
def _parse_status(self, value, default): if any(imap(value.__contains__, ['E', 'e'])): return Job.UNKNOWN if any(imap(value.__contains__, ['h', 's', 'S', 'T', 'w'])): return Job.QUEUED if any(imap(value.__contains__, ['r', 't'])): return Job.RUNNING return Job.READY
def process(self, pnum, partition, result): if self.enabled(): lumi_filter = self._lumi_filter.lookup(partition[DataSplitter.Nickname], is_selector=False) if lumi_filter: idx_runs = partition[DataSplitter.MetadataHeader].index('Runs') iter_run = ichain(imap(lambda m: m[idx_runs], partition[DataSplitter.Metadata])) short_lumi_filter = filter_lumi_filter(list(iter_run), lumi_filter) iter_lumi_range_str = imap(lambda lr: '"%s"' % lr, format_lumi(short_lumi_filter)) result['LUMI_RANGE'] = str.join(',', iter_lumi_range_str)
def _readJob2PID(self): fp = ZipFile(self._pathJob2PID, 'r') try: self.maxN = int(fp.readline()) if not self.maxN: self.maxN = None mapInfo = ifilter(identity, imap(str.strip, fp.readline().split(','))) self._mapJob2PID = dict(imap(lambda x: tuple(imap(lambda y: int(y.lstrip('!')), x.split(':'))), mapInfo)) self._activeMap = {} finally: fp.close()
def getEntries(self, path, metadata, events, seList, objStore): datacachePath = os.path.join(objStore.get('GC_WORKDIR', ''), 'datacache.dat') source = utils.QM((self._source == '') and os.path.exists(datacachePath), datacachePath, self._source) if source and (source not in self._lfnMap): pSource = DataProvider.createInstance('ListProvider', createConfig(), source) for (n, fl) in imap(lambda b: (b[DataProvider.Dataset], b[DataProvider.FileList]), pSource.getBlocks()): self._lfnMap.setdefault(source, {}).update(dict(imap(lambda fi: (self.lfnTrans(fi[DataProvider.URL]), n), fl))) pList = set() for key in ifilter(lambda k: k in metadata, self._parentKeys): pList.update(imap(lambda pPath: self._lfnMap.get(source, {}).get(self.lfnTrans(pPath)), metadata[key])) metadata['PARENT_PATH'] = lfilter(identity, pList) yield (path, metadata, events, seList, objStore)
def _read_jobnum2pnum(self): fp = GZipTextFile(self._path_jobnum2pnum, 'r') try: def _translate_info(jobnum_pnum_info): return tuple(imap(lambda x: int(x.lstrip('!')), jobnum_pnum_info.split(':', 1))) int(fp.readline()) # max number of jobs jobnum_pnum_info_iter = iidfilter(imap(str.strip, fp.readline().split(','))) self._map_jobnum2pnum = dict(imap(_translate_info, jobnum_pnum_info_iter)) self._can_submit_map = {} finally: fp.close()
def download_job(opts, work_dn, status_mon, job_db, job_obj, jobnum): if job_obj.get('download') == 'True' and not opts.mark_ignore_dl: return status_mon.register_job_result(jobnum, 'All files already downloaded', JobDownloadStatus.JOB_ALREADY) # Read the file hash entries from job info file fi_list = FileInfoProcessor().process(os.path.join(work_dn, 'output', 'job_%d' % jobnum)) or [] is_download_failed = False if not fi_list: if opts.mark_empty_fail: is_download_failed = True else: return status_mon.register_job_result(jobnum, 'Job has no output files', JobDownloadStatus.JOB_NO_OUTPUT) download_result_list = [] progress = ProgressActivity('Processing output files', len(fi_list)) for (fi_idx, fi) in enumerate(fi_list): progress.update_progress(fi_idx, msg='Processing output file %r' % fi[FileInfo.NameDest]) download_result_list.append(download_single_file(opts, jobnum, fi_idx, fi, status_mon)) progress.finish() is_download_failed = is_download_failed or any(imap(download_result_list.__contains__, [ FileDownloadStatus.FILE_TIMEOUT, FileDownloadStatus.FILE_HASH_FAILED, FileDownloadStatus.FILE_TRANSFER_FAILED, FileDownloadStatus.FILE_MKDIR_FAILED])) is_download_success = all(imap([FileDownloadStatus.FILE_OK, FileDownloadStatus.FILE_EXISTS].__contains__, download_result_list)) # Ignore the first opts.retry number of failed jobs retry_count = int(job_obj.get('download attempt', 0)) if fi_list and is_download_failed and opts.retry and (retry_count < int(opts.retry)): set_job_prop(job_db, jobnum, job_obj, 'download attempt', str(retry_count + 1)) return status_mon.register_job_result(jobnum, 'Download attempt #%d failed' % retry_count + 1, JobDownloadStatus.RETRY) delete_files(opts, jobnum, fi_list, is_download_failed) if is_download_failed: if opts.mark_fail: # Mark job as failed to trigger resubmission job_obj.state = Job.FAILED job_db.commit(jobnum, job_obj) status_mon.register_job_result(jobnum, 'Download failed', JobDownloadStatus.JOB_FAILED) elif is_download_success: if opts.mark_dl: # Mark as downloaded set_job_prop(job_db, jobnum, job_obj, 'download', 'True') status_mon.register_job_result(jobnum, 'Download successful', JobDownloadStatus.JOB_OK) else: # eg. because of SE blacklist status_mon.register_job_result(jobnum, 'Download incomplete', JobDownloadStatus.JOB_INCOMPLETE)
def _getPartition(self, key): if not self._cacheKey == key / self._keySize: self._cacheKey = key / self._keySize subTarFileObj = self._tar.extractfile('%03dXX.tgz' % (key / self._keySize)) subTarFileObj = BytesBuffer(gzip.GzipFile(fileobj = subTarFileObj).read()) # 3-4x speedup for sequential access self._cacheTar = tarfile.open(mode = 'r', fileobj = subTarFileObj) fullData = lmap(bytes2str, self._cacheTar.extractfile('%05d' % key).readlines()) data = self._fmt.parse(lfilter(lambda x: not x.startswith('='), fullData), keyParser = {None: int}, valueParser = self._parserMap) fileList = imap(lambda x: x[1:], ifilter(lambda x: x.startswith('='), fullData)) if DataSplitter.CommonPrefix in data: fileList = imap(lambda x: '%s/%s' % (data[DataSplitter.CommonPrefix], x), fileList) data[DataSplitter.FileList] = lmap(str.strip, fileList) return data
def process_intervention(opts, psource): log.info('') tmp = psource.getJobIntervention() if tmp: if opts.displaymode == 'parseable': log.info('R: %s', str.join(',', imap(str, tmp[0]))) log.info('D: %s', str.join(',', imap(str, tmp[1]))) else: log.info(' Redo: %s', repr(tmp[0])) log.info('Disable: %s', repr(tmp[1])) else: if opts.displaymode == 'parseable': log.info('NOINT') else: log.info('No intervention')
def process_intervention(opts, psource): utils.vprint('') tmp = psource.getJobIntervention() if tmp: if opts.displaymode == 'parseable': utils.vprint('R: %s' % str.join(',', imap(str, tmp[0]))) utils.vprint('D: %s' % str.join(',', imap(str, tmp[1]))) else: utils.vprint(' Redo: %r' % tmp[0]) utils.vprint('Disable: %r' % tmp[1]) else: if opts.displaymode == 'parseable': utils.vprint('NOINT') else: utils.vprint('No intervention')
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): jobnum = metadata_dict['GC_JOBNUM'] cms_log_fn = os.path.join(item, 'cmssw.dbs.tar.gz') if os.path.exists(cms_log_fn): tar = tarfile.open(cms_log_fn, 'r') # Collect infos about transferred files file_summary_map = {} try: file_info_str_list = tar.extractfile('files').readlines() for rawdata in imap(lambda value: bytes2str(value).split(), file_info_str_list): file_summary_map[rawdata[2]] = { 'SE_OUTPUT_HASH_CRC32': rawdata[0], 'SE_OUTPUT_SIZE': int(rawdata[1]) } obj_dict['CMSSW_FILES'] = file_summary_map except Exception: raise DatasetError('Could not read CMSSW file infos for job %d!' % jobnum) # Collect infos about CMSSW processing steps config_summary_map = {} self._process_steps(jobnum, tar, config_summary_map, file_summary_map) for cfg in config_summary_map: job_hash_list = metadata_dict.setdefault('CMSSW_CONFIG_JOBHASH', []) job_hash_list.append(config_summary_map[cfg]['CMSSW_CONFIG_HASH']) obj_dict.update({'CMSSW_CONFIG': config_summary_map, 'CMSSW_FILES': file_summary_map}) tar.close() yield (item, metadata_dict, entries, location_list, obj_dict)
def __init__(self, arg, **kwargs): idList = imap(lambda x: x.split('-'), arg.split(',')) try: parse = lambda x: utils.QM(x != '', int, str) self.ranges = lmap(lambda x: (parse(x[0])(x[0]), parse(x[-1])(x[-1])), idList) except Exception: raise UserError('Job identifiers must be integers or ranges.')
def process(self, wait = utils.wait): wmsTiming = self.wms.getTimings() t_start = time.time() while True: didWait = False # Check whether wms can submit if not self.wms.canSubmit(self._submitTime, self._submitFlag): self._submitFlag = False # Check free disk space spaceLogger = logging.getLogger('workflow.space') spaceLogger.addFilter(LogEveryNsec(interval = 5 * 60)) if (self._checkSpace > 0) and utils.freeSpace(self._workDir) < self._checkSpace: spaceLogger.warning('Not enough space left in working directory') else: for action in imap(str.lower, self._actionList): if action.startswith('c') and not utils.abort(): # check for jobs if self.jobManager.check(self.wms): didWait = wait(wmsTiming.waitBetweenSteps) elif action.startswith('r') and not utils.abort(): # retrieve finished jobs if self.jobManager.retrieve(self.wms): didWait = wait(wmsTiming.waitBetweenSteps) elif action.startswith('s') and not utils.abort() and self._submitFlag: if self.jobManager.submit(self.wms): didWait = wait(wmsTiming.waitBetweenSteps) # quit if abort flag is set or not in continuous mode if utils.abort() or ((self.duration >= 0) and (time.time() - t_start > self.duration)): break # idle timeout if not didWait: wait(wmsTiming.waitOnIdle) self.monitor.onFinish()
def __call__(self, jobNum, jobObj): def checkID(jobRange): if (jobRange[0] == '') or (jobNum >= jobRange[0]): if (jobRange[1] == '') or (jobNum <= jobRange[1]): return True return False return reduce(operator.or_, imap(checkID, self.ranges))
def _build_blocks(self, map_key2fm_list, map_key2name, map_key2metadata_dict): # Return named dataset for key in sorted(map_key2fm_list): result = { DataProvider.Dataset: map_key2name[key[:1]], DataProvider.BlockName: map_key2name[key[:2]], } fm_list = map_key2fm_list[key] # Determine location_list location_list = None for file_location_list in ifilter(lambda s: s is not None, imap(itemgetter(3), fm_list)): location_list = location_list or [] location_list.extend(file_location_list) if location_list is not None: result[DataProvider.Locations] = list(UniqueList(location_list)) # use first file [0] to get the initial metadata_dict [1] metadata_name_list = list(fm_list[0][1].keys()) result[DataProvider.Metadata] = metadata_name_list # translate file metadata into data provider file info entries def _translate_fm2fi(url, metadata_dict, entries, location_list, obj_dict): if entries is None: entries = -1 return {DataProvider.URL: url, DataProvider.NEntries: entries, DataProvider.Metadata: lmap(metadata_dict.get, metadata_name_list)} result[DataProvider.FileList] = lsmap(_translate_fm2fi, fm_list) yield result
def _getSectionKey(self, section): tmp = section.split() assert(len(tmp) > 0) (curSection, curNames, curTags) = (tmp[0], [], {}) for token in tmp[1:]: if ':' in token: tag_entry = token.split(':') assert(len(tag_entry) == 2) curTags[tag_entry[0]] = tag_entry[1] elif token: curNames.append(token) def myIndex(src, value): try: return src.index(value) except Exception: return None idxClass = myIndex(self._cfgClassSections, curSection) idxSection = myIndex(self._cfgSections, curSection) if (not self._cfgClassSections) and (not self._cfgSections): idxSection = 0 if (idxClass is not None) or (idxSection is not None): # Section is selected by class or manually idxNames = tuple(imap(lambda n: myIndex(self._cfgNames, n), curNames)) if None not in idxNames: # All names in current section are selected curTagNames = lfilter(lambda tn: tn in curTags, self._cfgTagsOrder) curTagNamesLeft = lfilter(lambda tn: tn not in self._cfgTagsOrder, curTags) idxTags = lmap(lambda tn: myIndex(self._cfgTags, (tn, curTags[tn])), curTagNames) if (None not in idxTags) and not curTagNamesLeft: return (idxClass, idxSection, idxNames, idxTags)
def partition_check(splitter): fail = utils.set() for jobNum in irange(splitter.getMaxJobs()): splitInfo = splitter.getSplitInfo(jobNum) try: (events, skip, files) = (0, 0, []) for line in open(os.path.join(opts.checkSplitting, 'jobs', 'job_%d.var' % jobNum)).readlines(): if 'MAX_EVENTS' in line: events = int(line.split('MAX_EVENTS', 1)[1].replace('=', '')) if 'SKIP_EVENTS' in line: skip = int(line.split('SKIP_EVENTS', 1)[1].replace('=', '')) if 'FILE_NAMES' in line: files = line.split('FILE_NAMES', 1)[1].replace('=', '').replace('\"', '').replace('\\', '') files = lmap(lambda x: x.strip().strip(','), files.split()) def printError(curJ, curS, msg): if curJ != curS: logging.warning('%s in job %d (j:%s != s:%s)', msg, jobNum, curJ, curS) fail.add(jobNum) printError(events, splitInfo[DataSplitter.NEntries], 'Inconsistent number of events') printError(skip, splitInfo[DataSplitter.Skipped], 'Inconsistent number of skipped events') printError(files, splitInfo[DataSplitter.FileList], 'Inconsistent list of files') except Exception: logging.warning('Job %d was never initialized!', jobNum) if fail: logging.warning('Failed: ' + str.join('\n', imap(str, fail)))
def processBlock(self, block): if self._lumi_filter.empty() and ((self._lumi_keep == LumiKeep.RunLumi) or (DataProvider.Metadata not in block)): return block def getMetadataIdx(key): if key in block.get(DataProvider.Metadata, []): return block[DataProvider.Metadata].index(key) idxRuns = getMetadataIdx('Runs') idxLumi = getMetadataIdx('Lumi') if not self._lumi_filter.empty(): lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector = False) if lumi_filter and ((idxRuns is None) or (idxLumi is None)) and self._lumi_strict: fqName = block[DataProvider.Dataset] if block[DataProvider.BlockName] != '0': fqName += '#' + block[DataProvider.BlockName] raise DatasetError('Strict lumi filter active but dataset %s does not provide lumi information!' % fqName) block[DataProvider.FileList] = list(self._processFI(block, idxRuns, idxLumi)) if not block[DataProvider.FileList]: return block[DataProvider.NEntries] = sum(imap(lambda fi: fi[DataProvider.NEntries], block[DataProvider.FileList])) if self._lumi_keep == LumiKeep.RunLumi: return block elif self._lumi_keep == LumiKeep.Run: if idxLumi is not None: block[DataProvider.Metadata].pop(idxLumi) return block removeRunLumi(block[DataProvider.Metadata], idxRuns, idxLumi) return block
def retrieveJobs(self, gcID_jobNum_List ): # Process output sandboxes returned by getJobsOutput # Function to force moving a directory def forceMove(source, target): try: if os.path.exists(target): shutil.rmtree(target) except IOError: self._log.exception('%r cannot be removed', target) return False try: shutil.move(source, target) except IOError: self._log.exception( 'Error moving job output directory from %r to %r', source, target) return False return True retrievedJobs = [] for inJobNum, pathName in self._getJobsOutput(gcID_jobNum_List): # inJobNum != None, pathName == None => Job could not be retrieved if pathName is None: if inJobNum not in retrievedJobs: yield (inJobNum, -1, {}, None) continue # inJobNum == None, pathName != None => Found leftovers of job retrieval if inJobNum is None: continue # inJobNum != None, pathName != None => Job retrieval from WMS was ok jobFile = os.path.join(pathName, 'job.info') try: job_info = self._job_parser.process(pathName) except Exception: self._log.exception('Unable to parse job.info') job_info = None if job_info: jobNum = job_info[JobResult.JOBNUM] if jobNum != inJobNum: raise BackendError('Invalid job id in job file %s' % jobFile) if forceMove(pathName, os.path.join(self._outputPath, 'job_%d' % jobNum)): retrievedJobs.append(inJobNum) yield (jobNum, job_info[JobResult.EXITCODE], job_info[JobResult.RAW], pathName) else: yield (jobNum, -1, {}, None) continue # Clean empty pathNames for subDir in imap(lambda x: x[0], os.walk(pathName, topdown=False)): try: os.rmdir(subDir) except Exception: clear_current_exception() if os.path.exists(pathName): # Preserve failed job utils.ensureDirExists(self._failPath, 'failed output directory') forceMove( pathName, os.path.join(self._failPath, os.path.basename(pathName))) yield (inJobNum, -1, {}, None)
def list_dataset_names(ds_name_list): header_list = [(DataProvider.Dataset, 'Dataset')] ConsoleTable.create( header_list, imap(lambda name: {DataProvider.Dataset: name}, ds_name_list))
def _cleanup_dict( mapping ): # strip all key value entries and filter empty parameters tmp = tuple( imap(lambda item: lmap(str.strip, item), mapping.items())) return dict(lfilter(lambda k_v: k_v[0] != '', tmp))
def __call__(self, jobNum, jobObj): return reduce( operator.and_, imap(lambda selector: selector(jobNum, jobObj), self._selectors))
def wrapList(value, length, delimLines=',\n', delimEntries=', '): counter = lambda item, buffer: len(item) + sum(imap(len, buffer) ) + 2 * len(buffer) > length wrapped = accumulate(value, [], counter, opAdd=lambda x, y: x + [y]) return str.join(delimLines, imap(lambda x: str.join(delimEntries, x), wrapped))
def download_job(opts, work_dn, status_mon, job_db, job_obj, jobnum): if job_obj.get('download') == 'True' and not opts.mark_ignore_dl: return status_mon.register_job_result(jobnum, 'All files already downloaded', JobDownloadStatus.JOB_ALREADY) # Read the file hash entries from job info file fi_list = FileInfoProcessor().process( os.path.join(work_dn, 'output', 'job_%d' % jobnum)) or [] is_download_failed = False if not fi_list: if opts.mark_empty_fail: is_download_failed = True else: return status_mon.register_job_result( jobnum, 'Job has no output files', JobDownloadStatus.JOB_NO_OUTPUT) download_result_list = [] progress = ProgressActivity('Processing output files', len(fi_list)) for (fi_idx, fi) in enumerate(fi_list): progress.update_progress(fi_idx, msg='Processing output file %r' % fi[FileInfo.NameDest]) download_result_list.append( download_single_file(opts, jobnum, fi_idx, fi, status_mon)) progress.finish() is_download_failed = is_download_failed or any( imap(download_result_list.__contains__, [ FileDownloadStatus.FILE_TIMEOUT, FileDownloadStatus.FILE_HASH_FAILED, FileDownloadStatus.FILE_TRANSFER_FAILED, FileDownloadStatus.FILE_MKDIR_FAILED ])) is_download_success = all( imap([FileDownloadStatus.FILE_OK, FileDownloadStatus.FILE_EXISTS].__contains__, download_result_list)) # Ignore the first opts.retry number of failed jobs retry_count = int(job_obj.get('download attempt', 0)) if fi_list and is_download_failed and opts.retry and (retry_count < int( opts.retry)): set_job_prop(job_db, jobnum, job_obj, 'download attempt', str(retry_count + 1)) return status_mon.register_job_result( jobnum, 'Download attempt #%d failed' % retry_count + 1, JobDownloadStatus.RETRY) delete_files(opts, jobnum, fi_list, is_download_failed) if is_download_failed: if opts.mark_fail: # Mark job as failed to trigger resubmission job_obj.state = Job.FAILED job_db.commit(jobnum, job_obj) status_mon.register_job_result(jobnum, 'Download failed', JobDownloadStatus.JOB_FAILED) elif is_download_success: if opts.mark_dl: # Mark as downloaded set_job_prop(job_db, jobnum, job_obj, 'download', 'True') status_mon.register_job_result(jobnum, 'Download successful', JobDownloadStatus.JOB_OK) else: # eg. because of SE blacklist status_mon.register_job_result(jobnum, 'Download incomplete', JobDownloadStatus.JOB_INCOMPLETE)
def fillParameterKeys(self, result): result.extend(imap(ParameterMetadata, self._keys))
def _center_of_mass(data): wsum_x = sum(imap(lambda pt: pt['x'] * pt['weight'], data)) wsum_y = sum(imap(lambda pt: pt['y'] * pt['weight'], data)) sum_w = sum(imap(lambda pt: pt['weight'], data)) return {'x': wsum_x / sum_w, 'y': wsum_y / sum_w}
def _get_map_gc_id_jobnum(self, jobnum_list): return dict( imap(lambda jobnum: (self.job_db.get_job(jobnum).gc_id, jobnum), jobnum_list))
def getScript(self): return lchain(imap(lambda h: h.getScript(), self._handlers))
def getNeededKeys(self, splitter): return lchain(imap(lambda p: p.getNeededKeys(splitter), self._processorList))
def getKeys(self): return lchain(imap(lambda p: p.getKeys(), self._processorList))
def formatRange(rlrange): (start, end) = rlrange default = lambda x, d: (x, d)[x is None] start = [default(start[0], '1'), default(start[1], 'MIN')] end = [default(end[0], '9999999'), default(end[1], 'MAX')] return str.join('-', imap(lambda x: '%s:%s' % tuple(x), (start, end)))
def getList(self, option, default=noDefault, parseItem=identity, **kwargs): obj2str = lambda value: '\n' + str.join('\n', imap(str, value)) str2obj = lambda value: lmap(parseItem, parseList(value, None)) return self._getInternal('list', obj2str, str2obj, None, option, default, **kwargs)
def newBlock(self, old, filelist): new = dict(old) new[DataProvider.FileList] = filelist new[DataProvider.NEntries] = sum( imap(lambda x: x[DataProvider.NEntries], filelist)) return new
def _sum(job_class): return sum(imap(js_dict.get, job_class.state_list))
def accepted_se(opts, fi): return any(imap(fi[FileInfo.Path].__contains__, opts.select_se)) or not opts.select_se
def get_local_username(): for username in iidfilter( imap(os.environ.get, ['LOGNAME', 'USER', 'LNAME', 'USERNAME'])): return username return ''
def __repr__(self): return 'key(%s)' % str.join( ', ', imap(lambda x: "'%s'" % x, self._lookup_keys))
def cleanupDict(d): # strip all key value entries tmp = tuple(imap(lambda item: imap(str.strip, item), d.items())) # filter empty parameters return lfilter(lambda k_v: k_v[0] != '', tmp)
def formatDict(d, fmt='%s=%r', joinStr=', '): return str.join(joinStr, imap(lambda k: fmt % (k, d[k]), sorted(d)))
def _formatFileList(self, fl): if self._prefix: fl = imap(lambda fn: self._prefix + fn.split('/store/', 1)[-1], fl) return str.join(', ', imap(lambda x: '"%s"' % x, fl))
def _accept_run(self, block, fi, idx_runs, lumi_filter): if idx_runs is None: return True return any( imap(lambda run: select_run(run, lumi_filter), fi[DataProvider.Metadata][idx_runs]))
def parse_list(value, delimeter, filter_fun=lambda x: x not in ['', '\n']): if value: return lfilter(filter_fun, imap(str.strip, value.split(delimeter))) return []
def _counter(item, buffer): return len(item) + sum(imap(len, buffer)) + 2 * len(buffer) > length
def fill_parameter_metadata(self, result): result.extend(imap(ParameterMetadata, self._output_vn_list))
def storageReq(self, sites): fmt = lambda x: '(target.GlueSEUniqueID == %s)' % jdlEscape(x) if sites: return 'anyMatch(other.storage.CloseSEs, ' + str.join( ' || ', imap(fmt, sites)) + ')'
def __repr__(self): return '%s(quiet = %r, code = %r, var = %r, file = %r, tree = %r, thread = %r)' % ( self.__class__.__name__, tuple(imap(logging.getLevelName, self._force_details_range)), self._ex_context, self._ex_vars, self._ex_fstack, self._ex_tree, self._ex_threads)
def get_dependency_list(self): if True in imap(lambda x: not x.startswith('dir'), self._storage_paths): return ['glite'] return []
def getTaskConfig(self): return { 'GC_MONITORING': str.join(' ', imap(os.path.basename, self.getScript())) }