def get_partition_unchecked(self, partition_num): nested_tar = self._get_nested_tar('%03dXX.tgz' % (partition_num / self._partition_chunk_size)) partition_str_list = lmap(bytes2str, nested_tar.extractfile('%05d' % partition_num).readlines()) partition = self._fmt.parse(ifilter(lambda x: not x.startswith('='), partition_str_list), key_parser={None: DataSplitter.intstr2enum}, value_parser=self._map_enum2parser) url_list = imap(lambda x: x[1:], ifilter(lambda x: x.startswith('='), partition_str_list)) return self._combine_partition_parts(partition, url_list)
def process(self, dn): try: jobInfo = JobInfoProcessor.process(self, dn) except Exception: logging.getLogger('wms').warning(sys.exc_info()[1]) jobInfo = None if jobInfo: jobData = jobInfo[JobResult.RAW] result = {} # parse old job info data format for files oldFileFormat = [ FileInfoProcessor.Hash, FileInfoProcessor.NameLocal, FileInfoProcessor.NameDest, FileInfoProcessor.Path ] for (fileKey, fileData) in ifilter( lambda key_value: key_value[0].startswith('FILE'), jobData.items()): fileIdx = fileKey.replace('FILE', '').rjust(1, '0') result[int(fileIdx)] = dict( izip(oldFileFormat, fileData.strip('"').split(' '))) # parse new job info data format for (fileKey, fileData) in ifilter( lambda key_value: key_value[0].startswith('OUTPUT_FILE'), jobData.items()): (fileIdx, fileProperty) = fileKey.replace('OUTPUT_FILE_', '').split('_') if isinstance(fileData, str): fileData = fileData.strip('"') result.setdefault( int(fileIdx), {})[FileInfoProcessor.str2enum(fileProperty)] = fileData return list(result.values())
def list_parameters(psrc, opts): (psp_list, need_gc_param) = get_parameters(opts, psrc) enabled_vn_list = opts.output.split(',') meta_list = lfilter(lambda k: (k in enabled_vn_list) or not opts.output, psrc.get_job_metadata()) tracked_vn_list = lmap(lambda k: k.value, ifilter(lambda k: not k.untracked, meta_list)) untracked_vn_list = lmap(lambda k: k.value, ifilter(lambda k: k.untracked, meta_list)) if opts.collapse > 0: (header_list, psp_list) = collapse_psp_list(psp_list, tracked_vn_list, opts) else: header_list = [('GC_JOB_ID', '#')] if need_gc_param: header_list.append(('GC_PARAM', 'GC_PARAM')) if opts.active: header_list.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: tracked_vn_list = opts.visible.split(',') header_list.extend(sorted(izip(tracked_vn_list, tracked_vn_list))) if opts.untracked: header_list.extend( sorted( imap( lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked_vn_list)))) ConsoleTable.create(header_list, psp_list)
def se_to_cms_name(self, se): site_names = [] for site_resource in ifilter(lambda resources: resources['fqdn'] == se, self._query('site-resources')): site_names.extend( self._query('site-names', match=site_resource['site_name'])) return lmap(lambda x: x['alias'], ifilter(lambda site: site['type'] == 'cms', site_names))
def cms_name_to_se(self, cms_name): cms_name_regex = re.compile(cms_name.replace('*', '.*').replace('%', '.*')) def _select_psn_site(site): return site['type'] == 'psn' and cms_name_regex.match(site['alias']) psn_site_names = ifilter(_select_psn_site, self._query('site-names')) site_aliases = set(imap(lambda x: x['alias'], psn_site_names)) def _select_se(resource): return (resource['type'] == 'SE') and (resource['alias'] in site_aliases) return lmap(lambda x: x['fqdn'], ifilter(_select_se, self._query('site-resources')))
def strDict(d, order = None): if not order: order = sorted(d.keys()) else: order = list(order) order.extend(ifilter(lambda x: x not in order, d.keys())) return str.join(', ', imap(lambda k: '%s = %s' % (k, repr(d[k])), order))
def _formatRequirements(self, reqs): result = ['other.GlueHostNetworkAdapterOutboundIP'] for reqType, arg in reqs: if reqType == WMS.SOFTWARE: result.append( 'Member(%s, other.GlueHostApplicationSoftwareRunTimeEnvironment)' % jdlEscape(arg)) elif reqType == WMS.WALLTIME: if arg > 0: result.append( '(other.GlueCEPolicyMaxWallClockTime >= %d)' % int( (arg + 59) / 60)) elif reqType == WMS.CPUTIME: if arg > 0: result.append('(other.GlueCEPolicyMaxCPUTime >= %d)' % int( (arg + 59) / 60)) elif reqType == WMS.MEMORY: if arg > 0: result.append('(other.GlueHostMainMemoryRAMSize >= %d)' % arg) elif reqType == WMS.STORAGE: result.append(self.storageReq(arg)) elif reqType == WMS.SITES: result.append(self.sitesReq(arg)) elif reqType == WMS.CPUS: pass # Handle number of cpus in makeJDL else: raise APIError('Unknown requirement type %s or argument %r' % (WMS.reqTypes[reqType], arg)) return str.join(' && ', ifilter(lambda x: x is not None, result))
def cancelJobs(self, allIds): if len(allIds) == 0: raise StopIteration waitFlag = False for ids in imap(lambda x: allIds[x:x + 5], irange(0, len(allIds), 5)): # Delete jobs in groups of 5 - with 5 seconds between groups if waitFlag and not utils.wait(5): break waitFlag = True jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = utils.ActivityLog('cancelling jobs') proc = LocalProcess(self._cancelExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs) retCode = proc.status(timeout=60, terminate=True) del activity # select cancelled jobs for deletedWMSId in ifilter(lambda x: x.startswith('- '), proc.stdout.iter()): deletedWMSId = self._createId(deletedWMSId.strip('- \n')) yield (jobNumMap.get(deletedWMSId), deletedWMSId) if retCode != 0: if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files={'jobs': utils.safeRead(jobs)}) utils.removeFiles([jobs])
def _forwardCall(self, args, assignFun, callFun): argMap = self._getMapID2Backend(args, assignFun) def makeGenerator(wmsPrefix): return callFun(self._wmsMap[wmsPrefix], argMap[wmsPrefix]) activeWMS = ifilter(lambda wmsPrefix: wmsPrefix in argMap, self._wmsMap) for result in tchain(imap(makeGenerator, activeWMS)): yield result
def _forward_call(self, args, assign_fun, call_fun): backend_name2args = self._get_map_backend_name2args(args, assign_fun) avail_backend_name_list = sorted(self._map_backend_name2backend) for backend_name in ifilter(backend_name2args.__contains__, avail_backend_name_list): wms = self._map_backend_name2backend[backend_name] for result in call_fun(wms, backend_name2args[backend_name]): yield result
def _update_map_error_code2msg(self, fn): # Read comments with error codes at the beginning of file: # <code> - description for line in ifilter(lambda x: x.startswith('#'), SafeFile(fn).iter_close()): tmp = lmap(str.strip, line.lstrip('#').split(' - ', 1)) if tmp[0].isdigit() and (len(tmp) == 2): self.map_error_code2msg[int(tmp[0])] = tmp[1]
def write(cls, fn, pa): fp = ZipFile(fn, 'w') try: keys = sorted(ifilter(lambda p: not p.untracked, pa.getJobKeys())) fp.write('# %s\n' % json.dumps(keys)) maxN = pa.getMaxJobs() if maxN: activity = utils.ActivityLog('Writing parameter dump') for jobNum in irange(maxN): activity.finish() activity = utils.ActivityLog( 'Writing parameter dump [%d/%d]' % (jobNum + 1, maxN)) meta = pa.getJobInfo(jobNum) if meta.get(ParameterInfo.ACTIVE, True): fp.write( '%d\t%s\n' % (jobNum, str.join( '\t', imap(lambda k: json.dumps(meta.get(k, '')), keys)))) else: fp.write( '%d!\t%s\n' % (jobNum, str.join( '\t', imap(lambda k: json.dumps(meta.get(k, '')), keys)))) activity.finish() finally: fp.close()
def _getCategoryStateSummary(self): (catStateDict, catDescDict, catSubcatDict) = CategoryBaseReport._getCategoryStateSummary(self) # Used for quick calculations catLenDict = {} for catKey in catStateDict: catLenDict[catKey] = sum(catStateDict[catKey].values()) # Merge successfully completed categories self._mergeCats(catStateDict, catDescDict, catSubcatDict, catLenDict, 'Completed subtasks', lfilter(lambda catKey: (len(catStateDict[catKey]) == 1) and (Job.SUCCESS in catStateDict[catKey]), catStateDict)) # Next merge steps shouldn't see non-dict catKeys in catDescDict hiddenDesc = {} for catKey in ifilter(lambda catKey: not isinstance(catDescDict[catKey], dict), list(catDescDict)): hiddenDesc[catKey] = catDescDict.pop(catKey) # Merge categories till goal is reached self._mergeCatsWithGoal(catStateDict, catDescDict, catSubcatDict, catLenDict, hiddenDesc) # Remove redundant variables from description varKeyResult = self._getKeyMergeResults(catDescDict) self._clearCategoryDesc(varKeyResult, catDescDict) # Restore hidden descriptions catDescDict.update(hiddenDesc) # Enforce category maximum - merge categories with the least amount of jobs if len(catStateDict) != self._catMax: self._mergeCats(catStateDict, catDescDict, catSubcatDict, catLenDict, 'Remaining subtasks', sorted(catStateDict, key = lambda catKey: -catLenDict[catKey])[self._catMax - 1:]) # Finalize descriptions: if len(catDescDict) == 1: catDescDict[list(catDescDict.keys())[0]] = 'All jobs' return (catStateDict, catDescDict, catSubcatDict)
def strDict(d, order=None): if not order: order = sorted(d.keys()) else: order = list(order) order.extend(ifilter(lambda x: x not in order, d.keys())) return str.join(', ', imap(lambda k: '%s = %s' % (k, repr(d[k])), order))
def _submitJob(self, jobNum, module): fd, jdl = tempfile.mkstemp('.jdl') try: jdlData = self.makeJDL(jobNum, module) utils.safeWrite(os.fdopen(fd, 'w'), jdlData) except Exception: utils.removeFiles([jdl]) raise BackendError('Could not write jdl data to %s.' % jdl) try: submitArgs = [] for key_value in utils.filterDict(self._submitParams, vF = lambda v: v).items(): submitArgs.extend(key_value) submitArgs.append(jdl) activity = Activity('submitting job %d' % jobNum) proc = LocalProcess(self._submitExec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submitArgs) gcID = None for line in ifilter(lambda x: x.startswith('http'), imap(str.strip, proc.stdout.iter(timeout = 60))): gcID = line retCode = proc.status(timeout = 0, terminate = True) activity.finish() if (retCode != 0) or (gcID is None): if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files = {'jdl': SafeFile(jdl).read()}) finally: utils.removeFiles([jdl]) return (jobNum, utils.QM(gcID, self._createId(gcID), None), {'jdl': str.join('', jdlData)})
def _get_metadata_str(fi, idx_list): idx_list = ifilter( lambda idx: idx < len(fi[DataProvider.Metadata]), idx_list) return json.dumps( lmap(lambda idx: fi[DataProvider.Metadata][idx], idx_list))
def _buildBlocks(self, protoBlocks, hashNameDictDS, hashNameDictB): # Return named dataset for hashDS in sorted(protoBlocks): for hashB in sorted(protoBlocks[hashDS]): blockSEList = None for seList in ifilter( lambda s: s is not None, imap(lambda x: x[3], protoBlocks[hashDS][hashB])): blockSEList = blockSEList or [] blockSEList.extend(seList) if blockSEList is not None: blockSEList = list(UniqueList(blockSEList)) metaKeys = protoBlocks[hashDS][hashB][0][1].keys() def fnProps(path, metadata, events, seList, objStore): if events is None: events = -1 return { DataProvider.URL: path, DataProvider.NEntries: events, DataProvider.Metadata: lmap(metadata.get, metaKeys) } yield { DataProvider.Dataset: hashNameDictDS[hashDS], DataProvider.BlockName: hashNameDictB[hashB][1], DataProvider.Locations: blockSEList, DataProvider.Metadata: list(metaKeys), DataProvider.FileList: lsmap(fnProps, protoBlocks[hashDS][hashB]) }
def cancelJobs(self, allIds): if len(allIds) == 0: raise StopIteration waitFlag = False for ids in imap(lambda x: allIds[x:x+5], irange(0, len(allIds), 5)): # Delete jobs in groups of 5 - with 5 seconds between groups if waitFlag and not utils.wait(5): break waitFlag = True jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = utils.ActivityLog('cancelling jobs') proc = LocalProcess(self._cancelExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs) retCode = proc.status(timeout = 60, terminate = True) del activity # select cancelled jobs for deletedWMSId in ifilter(lambda x: x.startswith('- '), proc.stdout.iter()): deletedWMSId = self._createId(deletedWMSId.strip('- \n')) yield (jobNumMap.get(deletedWMSId), deletedWMSId) if retCode != 0: if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files = {'jobs': utils.safeRead(jobs)}) utils.removeFiles([jobs])
def parse_bind_args(cls, value, **kwargs): config = kwargs.pop('config') datasource_name = kwargs.pop('datasource_name', 'dataset') provider_name_default = kwargs.pop('provider_name_default', 'ListProvider') for entry in ifilter(str.strip, value.splitlines()): (nickname, provider_name, dataset_expr) = ('', provider_name_default, None) tmp = lmap(str.strip, entry.split(':', 2)) if len( tmp ) == 3: # use tmp[...] to avoid false positives for unpacking checker ... (nickname, provider_name, dataset_expr) = (tmp[0], tmp[1], tmp[2]) if dataset_expr.startswith('/'): dataset_expr = '/' + dataset_expr.lstrip('/') elif len(tmp) == 2: (nickname, dataset_expr) = (tmp[0], tmp[1]) elif len(tmp) == 1: dataset_expr = tmp[0] provider = cls.get_class(provider_name) bind_value = str.join(':', [ nickname, provider.get_bind_class_name(provider_name), dataset_expr ]) yield (bind_value, provider, config, datasource_name, dataset_expr, nickname)
def _sort_from_lines(fn): # sort 'from' order replacement_str_pair_list = [] raw = SafeFile(fn).read_close() for import_line in ifilter(lambda line: line.startswith('from '), raw.splitlines()): try: _from, _source, _import, _what = import_line.split(None, 3) assert _from == 'from' assert _import == 'import' _comment = None if '#' in _what: _what, _comment = lmap(str.strip, _what.split('#', 1)) import_list = sorted(imap(str.strip, _what.split(','))) new_import_line = 'from %s import %s' % ( _source, str.join(', ', import_list)) if _comment is not None: new_import_line += ' # ' + _comment replacement_str_pair_list.append((import_line, new_import_line)) except: logging.warning('%s: %s', fn, import_line) raise for (old, new) in replacement_str_pair_list: raw = raw.replace(old, new) open(fn, 'w').write(raw)
def __init__(self, jobDB, task, jobs = None, configString = ''): Report.__init__(self, jobDB, task, jobs, configString) catJobs = {} catDescDict = {} # Assignment of jobs to categories (depending on variables and using datasetnick if available) jobConfig = {} varList = [] for jobNum in self._jobs: if task: jobConfig = task.getJobConfig(jobNum) varList = sorted(ifilter(lambda var: '!' not in repr(var), jobConfig.keys())) if 'DATASETSPLIT' in varList: varList.remove('DATASETSPLIT') varList.append('DATASETNICK') catKey = str.join('|', imap(lambda var: '%s=%s' % (var, jobConfig[var]), varList)) catJobs.setdefault(catKey, []).append(jobNum) if catKey not in catDescDict: catDescDict[catKey] = dict(imap(lambda var: (var, jobConfig[var]), varList)) # Kill redundant keys from description commonVars = dict(imap(lambda var: (var, jobConfig[var]), varList)) # seed with last varList for catKey in catDescDict: for key in list(commonVars.keys()): if key not in catDescDict[catKey].keys(): commonVars.pop(key) elif commonVars[key] != catDescDict[catKey][key]: commonVars.pop(key) for catKey in catDescDict: for commonKey in commonVars: catDescDict[catKey].pop(commonKey) # Generate job-category map with efficient int keys - catNum becomes the new catKey self._job2cat = {} self._catDescDict = {} for catNum, catKey in enumerate(sorted(catJobs)): self._catDescDict[catNum] = catDescDict[catKey] self._job2cat.update(dict.fromkeys(catJobs[catKey], catNum))
def _checkJobList(self, wms, jobList): if self._defect_tries: nDefect = len(self._defect_counter ) # Waiting list gets larger in case reported == [] waitList = self._sample( self._defect_counter, nDefect - max(1, int(nDefect / 2**self._defect_raster))) jobList = lfilter(lambda x: x not in waitList, jobList) (change, timeoutList, reported) = JobManager._checkJobList(self, wms, jobList) for jobNum in reported: self._defect_counter.pop(jobNum, None) if self._defect_tries and (change is not None): self._defect_raster = utils.QM( reported, 1, self._defect_raster + 1) # make 'raster' iteratively smaller for jobNum in ifilter(lambda x: x not in reported, jobList): self._defect_counter[jobNum] = self._defect_counter.get( jobNum, 0) + 1 kickList = lfilter( lambda jobNum: self._defect_counter[jobNum] >= self. _defect_tries, self._defect_counter) for jobNum in set(kickList + utils.QM( (len(reported) == 0) and (len(jobList) == 1), jobList, [])): timeoutList.append(jobNum) self._defect_counter.pop(jobNum) return (change, timeoutList, reported)
def getEntries(self, path, metadata, events, seList, objStore): if 'JOBINFO' not in objStore: raise DatasetError( 'Job information is not filled! Ensure that "JobInfoFromOutputDir" is scheduled!' ) try: jobInfo = objStore['JOBINFO'] files = ifilter(lambda x: x[0].startswith('file'), jobInfo.items()) fileInfos = imap(lambda x_y: tuple(x_y[1].strip('"').split(' ')), files) for (hashMD5, name_local, name_dest, pathSE) in fileInfos: metadata.update({ 'SE_OUTPUT_HASH_MD5': hashMD5, 'SE_OUTPUT_FILE': name_local, 'SE_OUTPUT_BASE': os.path.splitext(name_local)[0], 'SE_OUTPUT_PATH': pathSE }) yield (os.path.join(pathSE, name_dest), metadata, events, seList, objStore) except KeyboardInterrupt: sys.exit(os.EX_TEMPFAIL) except Exception: raise DatasetError('Unable to read file stageout information!')
def _check_imported_use(fn, list_from, code_str): # remove import lines for usage check def _is_import_or_comment(line): line = line.lstrip() return line.startswith('#') or line.startswith( 'from ') or line.startswith('import ') code_str = str.join( '\n', ifilter(lambda line: not _is_import_or_comment(line), code_str.splitlines())) for imported in list_from: if ' as ' in imported: imported = imported.split(' as ')[1] def _chk(fmt): code_piece = fmt % imported return code_piece in code_str if any( imap(_chk, [ '%s(', '%s.', 'raise %s', '(%s)', '=%s', ' = %s', ' != %s', 'return %s', ', %s)', '(%s, ', 'except %s', ' %s,', '\t%s,', '%s, [', '%s]', 'or %s', '%s not in' ])): continue if imported in ['backends', 'datasets']: continue logging.warning('%s superflous %r', fn, imported)
def dump(self): stored_logged = self.logged self.logged = False for data in str.join('', ifilter(identity, self._log)).splitlines(): self._console.eraseLine() self.write(data + '\n') self.logged = stored_logged
def filter_dict(mapping, key_filter=lambda k: True, value_filter=lambda v: True): def _filter_items(k_v): return key_filter(k_v[0]) and value_filter(k_v[1]) return dict(ifilter(_filter_items, mapping.items()))
def _getUserSource(self, pExpr): # Wrap psource factory functions def createWrapper(clsName): def wrapper(*args): parameterClass = ParameterSource.getClass(clsName) try: return parameterClass.create(self._paramConfig, self._repository, *args) except Exception: raise ParameterError( 'Error while creating %r with arguments %r' % (parameterClass.__name__, args)) return wrapper userFun = {} for clsInfo in ParameterSource.getClassList(): for clsName in ifilter(lambda name: name != 'depth', clsInfo.keys()): userFun[clsName] = createWrapper(clsName) try: return eval(pExpr, dict(userFun)) # pylint:disable=eval-used except Exception: self._log.warning('Available functions: %s', sorted(userFun.keys())) raise
def _build_blocks(self, map_key2fm_list, map_key2name, map_key2metadata_dict): # Return named dataset for key in sorted(map_key2fm_list): result = { DataProvider.Dataset: map_key2name[key[:1]], DataProvider.BlockName: map_key2name[key[:2]], } fm_list = map_key2fm_list[key] # Determine location_list location_list = None for file_location_list in ifilter(lambda s: s is not None, imap(itemgetter(3), fm_list)): location_list = location_list or [] location_list.extend(file_location_list) if location_list is not None: result[DataProvider.Locations] = list(UniqueList(location_list)) # use first file [0] to get the initial metadata_dict [1] metadata_name_list = list(fm_list[0][1].keys()) result[DataProvider.Metadata] = metadata_name_list # translate file metadata into data provider file info entries def _translate_fm2fi(url, metadata_dict, entries, location_list, obj_dict): if entries is None: entries = -1 return {DataProvider.URL: url, DataProvider.NEntries: entries, DataProvider.Metadata: lmap(metadata_dict.get, metadata_name_list)} result[DataProvider.FileList] = lsmap(_translate_fm2fi, fm_list) yield result
def getEntries(self, path, metadata, events, seList, objStore): datacachePath = os.path.join(objStore.get('GC_WORKDIR', ''), 'datacache.dat') source = utils.QM((self._source == '') and os.path.exists(datacachePath), datacachePath, self._source) if source and (source not in self._lfnMap): pSource = DataProvider.createInstance('ListProvider', createConfig(), source) for (n, fl) in imap( lambda b: (b[DataProvider.Dataset], b[DataProvider.FileList]), pSource.getBlocks()): self._lfnMap.setdefault(source, {}).update( dict( imap( lambda fi: (self.lfnTrans(fi[DataProvider.URL]), n), fl))) pList = set() for key in ifilter(lambda k: k in metadata, self._parentKeys): pList.update( imap( lambda pPath: self._lfnMap.get(source, {}).get( self.lfnTrans(pPath)), metadata[key])) metadata['PARENT_PATH'] = lfilter(identity, pList) yield (path, metadata, events, seList, objStore)
def __init__(self, jobDB, task, jobs = None, configString = ''): Report.__init__(self, jobDB, task, jobs, configString) catJobs = {} catDescDict = {} # Assignment of jobs to categories (depending on variables and using datasetnick if available) jobConfig = {} for jobNum in self._jobs: if task: jobConfig = task.getJobConfig(jobNum) varList = sorted(ifilter(lambda var: '!' not in repr(var), jobConfig.keys())) if 'DATASETSPLIT' in varList: varList.remove('DATASETSPLIT') varList.append('DATASETNICK') catKey = str.join('|', imap(lambda var: '%s=%s' % (var, jobConfig[var]), varList)) catJobs.setdefault(catKey, []).append(jobNum) if catKey not in catDescDict: catDescDict[catKey] = dict(imap(lambda var: (var, jobConfig[var]), varList)) # Kill redundant keys from description commonVars = dict(imap(lambda var: (var, jobConfig[var]), varList)) # seed with last varList for catKey in catDescDict: for key in list(commonVars.keys()): if key not in catDescDict[catKey].keys(): commonVars.pop(key) elif commonVars[key] != catDescDict[catKey][key]: commonVars.pop(key) for catKey in catDescDict: for commonKey in commonVars: catDescDict[catKey].pop(commonKey) # Generate job-category map with efficient int keys - catNum becomes the new catKey self._job2cat = {} self._catDescDict = {} for catNum, catKey in enumerate(sorted(catJobs)): self._catDescDict[catNum] = catDescDict[catKey] self._job2cat.update(dict.fromkeys(catJobs[catKey], catNum))
def getMetadata(fi, idxList): idxList = ifilter( lambda idx: idx < len(fi[DataProvider.Metadata]), idxList) return json.dumps( lmap(lambda idx: fi[DataProvider.Metadata][idx], idxList))
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): if 'JOBINFO' not in obj_dict: raise DatasetError( 'Job infos not available! Ensure that "JobInfoFromOutputDir" is selected!' ) try: job_info_dict = obj_dict['JOBINFO'] file_info_str_iter = ifilter(lambda x: x[0].startswith('file'), job_info_dict.items()) file_info_tuple_list = imap( lambda x_y: tuple(x_y[1].strip('"').split(' ')), file_info_str_iter) for (file_hash, fn_local, fn_dest, se_path) in file_info_tuple_list: metadata_dict.update({ 'SE_OUTPUT_HASH_MD5': file_hash, 'SE_OUTPUT_FILE': fn_local, 'SE_OUTPUT_BASE': os.path.splitext(fn_local)[0], 'SE_OUTPUT_PATH': se_path }) yield (os.path.join(se_path, fn_dest), metadata_dict, entries, location_list, obj_dict) except Exception: raise DatasetError('Unable to read file stageout information!')
def splitBlackWhiteList(bwfilter): blacklist = lmap( lambda x: x[1:], ifilter(lambda x: x.startswith('-'), QM(bwfilter, bwfilter, []))) whitelist = lfilter(lambda x: not x.startswith('-'), QM(bwfilter, bwfilter, [])) return (blacklist, whitelist)
def _forwardCall(self, args, assignFun, callFun): argMap = self._getMapID2Backend(args, assignFun) for wmsPrefix in ifilter(lambda wmsPrefix: wmsPrefix in argMap, sorted(self._wmsMap)): wms = self._wmsMap[wmsPrefix] for result in callFun(wms, argMap[wmsPrefix]): yield result
def __init__(self, config): ParameterFactory.__init__(self, config) self._psrc_list = [] # Random number variables jobs_config = config.change_view(add_sections=['jobs']) self._random_variables = jobs_config.get_list('random variables', ['JOB_RANDOM'], on_change=None) nseeds = jobs_config.get_int('nseeds', 10) seeds_new = lmap(lambda x: str(random.randint(0, 10000000)), irange(nseeds)) self._random_seeds = jobs_config.get_list('seeds', seeds_new, persistent=True) # Get constants from [constants <tags...>] constants_config = config.change_view(view_class='TaggedConfigView', set_classes=None, set_sections=['constants'], set_names=None) constants_pconfig = ParameterConfig(constants_config) for vn_const in ifilter(lambda opt: ' ' not in opt, constants_config.get_option_list()): constants_config.set('%s type' % vn_const, 'verbatim', '?=') self._register_psrc(constants_pconfig, vn_const.upper()) param_config = config.change_view(view_class='TaggedConfigView', set_classes=None, add_sections=['parameters'], inherit_sections=True) # Get constants from [<Module>] constants task_pconfig = ParameterConfig(param_config) for vn_const in param_config.get_list('constants', []): config.set('%s type' % vn_const, 'verbatim', '?=') self._register_psrc(task_pconfig, vn_const) # Get global repeat value from 'parameters' section self._repeat = param_config.get_int('repeat', -1, on_change=None) self._req = param_config.get_bool('translate requirements', True, on_change=None) self._pfactory = param_config.get_plugin('parameter factory', 'SimpleParameterFactory', cls=ParameterFactory)
def _check_get_jobnum_list(self, task, wms, jobnum_list): if self._defect_tries: num_defect = len( self._defect_counter ) # Waiting list gets larger in case reported == [] num_wait = num_defect - max( 1, int(num_defect / 2**self._defect_raster)) jobnum_list_wait = self._sample(self._defect_counter, num_wait) jobnum_list = lfilter( lambda jobnum: jobnum not in jobnum_list_wait, jobnum_list) (change, jobnum_list_timeout, reported) = JobManager._check_get_jobnum_list(self, task, wms, jobnum_list) for jobnum in reported: self._defect_counter.pop(jobnum, None) if self._defect_tries and (change is not None): # make 'raster' iteratively smaller self._defect_raster += 1 if reported: self._defect_raster = 1 for jobnum in ifilter(lambda x: x not in reported, jobnum_list): self._defect_counter[jobnum] = self._defect_counter.get( jobnum, 0) + 1 jobnum_list_kick = lfilter( lambda jobnum: self._defect_counter[jobnum] >= self. _defect_tries, self._defect_counter) if (len(reported) == 0) and (len(jobnum_list) == 1): jobnum_list_kick.extend(jobnum_list) for jobnum in set(jobnum_list_kick): jobnum_list_timeout.append(jobnum) self._defect_counter.pop(jobnum) return (change, jobnum_list_timeout, reported)
def _getBlocksInternal(self): # Split files into blocks/datasets via key functions and determine metadata intersection (protoBlocks, commonDS, commonB) = ({}, {}, {}) def getActiveKeys(kUser, kGuard, gIdx): return kUser + (kGuard or lchain(imap(lambda x: x.getGuards()[gIdx], self._scanner))) keysDS = getActiveKeys(self._ds_keys_user, self._ds_keys_guard, 0) keysB = getActiveKeys(self._b_keys_user, self._b_keys_guard, 1) for fileInfo in ifilter(itemgetter(0), self._collectFiles()): hashDS = self._generateKey(keysDS, md5_hex(repr(self._datasetExpr)) + md5_hex(repr(self._datasetNick)), *fileInfo) hashB = self._generateKey(keysB, hashDS + md5_hex(repr(fileInfo[3])), *fileInfo) # [3] == SE list if not self._ds_select or (hashDS in self._ds_select): if not self._b_select or (hashB in self._b_select): fileInfo[1].update({'DS_KEY': hashDS, 'BLOCK_KEY': hashB}) protoBlocks.setdefault(hashDS, {}).setdefault(hashB, []).append(fileInfo) utils.intersectDict(commonDS.setdefault(hashDS, dict(fileInfo[1])), fileInfo[1]) utils.intersectDict(commonB.setdefault(hashDS, {}).setdefault(hashB, dict(fileInfo[1])), fileInfo[1]) # Generate names for blocks/datasets using common metadata (hashNameDictDS, hashNameDictB) = ({}, {}) for hashDS in protoBlocks: hashNameDictDS[hashDS] = self._generateDatasetName(hashDS, commonDS[hashDS]) for hashB in protoBlocks[hashDS]: hashNameDictB[hashB] = (hashDS, self._generateBlockName(hashB, commonB[hashDS][hashB])) self._findCollision('dataset', hashNameDictDS, commonDS, keysDS, lambda name, key: [key]) self._findCollision('block', hashNameDictB, commonB, keysDS + keysB, lambda name, key: [name[0], key], lambda name: name[1]) for block in self._buildBlocks(protoBlocks, hashNameDictDS, hashNameDictB): yield block
def _check_get_jobnum_list(self, task, wms, jobnum_list): if self._defect_tries: num_defect = len(self._defect_counter) # Waiting list gets larger in case reported == [] num_wait = num_defect - max(1, int(num_defect / 2 ** self._defect_raster)) jobnum_list_wait = self._sample(self._defect_counter, num_wait) jobnum_list = lfilter(lambda jobnum: jobnum not in jobnum_list_wait, jobnum_list) (change, jobnum_list_timeout, reported) = JobManager._check_get_jobnum_list( self, task, wms, jobnum_list) for jobnum in reported: self._defect_counter.pop(jobnum, None) if self._defect_tries and (change is not None): # make 'raster' iteratively smaller self._defect_raster += 1 if reported: self._defect_raster = 1 for jobnum in ifilter(lambda x: x not in reported, jobnum_list): self._defect_counter[jobnum] = self._defect_counter.get(jobnum, 0) + 1 jobnum_list_kick = lfilter(lambda jobnum: self._defect_counter[jobnum] >= self._defect_tries, self._defect_counter) if (len(reported) == 0) and (len(jobnum_list) == 1): jobnum_list_kick.extend(jobnum_list) for jobnum in set(jobnum_list_kick): jobnum_list_timeout.append(jobnum) self._defect_counter.pop(jobnum) return (change, jobnum_list_timeout, reported)
def _format_reqs(self, req_list, result): req_string_list = ['other.GlueHostNetworkAdapterOutboundIP'] for req_type, arg in req_list: if req_type == WMS.SOFTWARE: req_string_list.append( 'Member(%s, other.GlueHostApplicationSoftwareRunTimeEnvironment)' % self._escape(arg)) elif req_type == WMS.WALLTIME: if arg > 0: req_string_list.append( '(other.GlueCEPolicyMaxWallClockTime >= %d)' % int( (arg + 59) / 60)) elif req_type == WMS.CPUTIME: if arg > 0: req_string_list.append( '(other.GlueCEPolicyMaxCPUTime >= %d)' % int( (arg + 59) / 60)) elif req_type == WMS.MEMORY: if arg > 0: req_string_list.append( '(other.GlueHostMainMemoryRAMSize >= %d)' % arg) elif req_type == WMS.STORAGE: req_string_list.append(self._format_reqs_storage(arg)) elif req_type == WMS.SITES: req_string_list.append(self._format_reqs_sites(arg)) elif req_type == WMS.CPUS: pass # Handle number of cpus in makeJDL else: raise APIError('Unknown requirement type %r or argument %r' % (WMS.enum2str(req_type), arg)) result['Requirements'] = str.join(' && ', ifilter(identity, req_string_list))
def process(self, dn): jobInfo = JobInfoProcessor.process(self, dn) if jobInfo: jobData = jobInfo[2] result = {} # parse old job info data format for files oldFileFormat = [FileInfoProcessor.Hash, FileInfoProcessor.NameLocal, FileInfoProcessor.NameDest, FileInfoProcessor.Path] for (fileKey, fileData) in ifilter(lambda key_value: key_value[0].startswith('FILE'), jobData.items()): fileIdx = fileKey.replace('FILE', '').rjust(1, '0') result[int(fileIdx)] = dict(izip(oldFileFormat, fileData.strip('"').split(' '))) # parse new job info data format for (fileKey, fileData) in ifilter(lambda key_value: key_value[0].startswith('OUTPUT_FILE'), jobData.items()): (fileIdx, fileProperty) = fileKey.replace('OUTPUT_FILE_', '').split('_') if isinstance(fileData, str): fileData = fileData.strip('"') result.setdefault(int(fileIdx), {})[FileInfoProcessor.str2enum(fileProperty)] = fileData return list(result.values())
def _getFilteredVarDict(self, varDict, varDictKeyComponents, hashKeys): tmp = varDict for key_component in varDictKeyComponents: tmp = tmp[key_component] result = {} for key, value in ifilter(lambda k_v: k_v[0] in hashKeys, tmp.items()): result[key] = value return result
def get_dict(self, option, default=unspecified, parser=identity, strfun=str, **kwargs): # Returns a tuple with (<dictionary>, <keys>) - the keys are sorted by order of appearance # Default key is accessed via key == None (None is never in keys!) return self._get_internal('dictionary', obj2str=lambda value: str_dict_cfg(value, parser, strfun), str2obj=lambda value: parse_dict_cfg(value, parser), def2obj=lambda value: (value, sorted(ifilter(lambda key: key is not None, value.keys()))), option=option, default_obj=default, **kwargs)
def get_cmssw_info(tar_fn): import xml.dom.minidom # Read framework report files to get number of events cmssw_tar = tarfile.open(tar_fn, 'r:gz') fwk_report_list = ifilter(lambda x: os.path.basename(x.name) == 'report.xml', cmssw_tar.getmembers()) for fwk_report_fn in imap(cmssw_tar.extractfile, fwk_report_list): yield xml.dom.minidom.parse(fwk_report_fn)
def translateEntry(meta): # Translates parameter setting into hash tmp = md5() for key in ifilter(lambda k: k in meta, keys_store): if str(meta[key]): tmp.update(str2bytes(key)) tmp.update(str2bytes(str(meta[key]))) return { ParameterInfo.HASH: tmp.hexdigest(), 'GC_PARAM': meta['GC_PARAM'], ParameterInfo.ACTIVE: meta[ParameterInfo.ACTIVE] }
def _forward_call(self, args, assign_fun, call_fun): backend_name2args = self._get_map_backend_name2args(args, assign_fun) def _make_generator(backend_name): return call_fun(self._map_backend_name2backend[backend_name], backend_name2args[backend_name]) backend_name_iter = ifilter(backend_name2args.__contains__, self._map_backend_name2backend) for result in tchain(imap(_make_generator, backend_name_iter)): yield result
def _translate_pa2pspi_list(padapter): # Reduces parameter adapter output to essential information for diff - faster than keying meta_iter = ifilter(lambda k: not k.untracked, padapter.get_job_metadata()) meta_list = sorted(meta_iter, key=lambda k: k.value) for psp in padapter.iter_jobs(): # Translates parameter space point into hash psp_item_iter = imap(lambda meta: (meta.value, psp.get(meta.value)), meta_list) hash_str = md5_hex(repr(lfilter(itemgetter(1), psp_item_iter))) yield (psp[ParameterInfo.ACTIVE], hash_str, psp['GC_PARAM'])
def _parse(self, proc): status_iter = proc.stdout.iter(self._timeout) head = lmap(lambda x: x.strip('%').lower(), next(status_iter, '').split()) for entry in imap(str.strip, status_iter): job_info = dict(izip(head, ifilter(lambda x: x != '', entry.split(None, len(head) - 1)))) job_info[CheckInfo.WMSID] = job_info.pop('pid') job_info[CheckInfo.RAW_STATUS] = job_info.pop('stat') job_info.update({CheckInfo.QUEUE: 'localqueue', CheckInfo.WN: 'localhost'}) yield job_info
def addFSet(self, group, short, option, help, flag_set): def is_default(opt): return (self._parser.get_option(opt).default and self._parser.get_option(opt).action == 'store_true') or \ (not self._parser.get_option(opt).default and self._parser.get_option(opt).action == 'store_false') if '%s' in help: help = help % str.join(' ', ifilter(lambda x: not is_default(x), flag_set.split())) flag_set_id = len(self._flag_set) self._flag_set[flag_set_id] = flag_set return self._add(group, short, option, False, 'store_true', help, dest = '_flag_set_%d' % flag_set_id)
def _writeJob2PID(self, fn): fp = ZipFile(fn, 'w') try: fp.write('%d\n' % (self._rawSource.getMaxParameters() or 0)) data = ifilter(lambda jobNum_pNum: jobNum_pNum[0] != jobNum_pNum[1], self._mapJob2PID.items()) datastr = lmap(lambda jobNum_pNum: '%d:%d' % jobNum_pNum, data) fp.write('%s\n' % str.join(',', datastr)) finally: fp.close()
def _parse(self, proc): for line in ifilter(identity, proc.stdout.iter(self._timeout)): if 'error' in line.lower(): raise BackendError('Unable to parse status line %s' % repr(line)) tmp = line.split() try: wmsID = str(int(tmp[0])) except Exception: continue yield {CheckInfo.WMSID: wmsID, CheckInfo.RAW_STATUS: tmp[2], CheckInfo.QUEUE: tmp[1]}
def _write_jobnum2pnum(self, fn): fp = GZipTextFile(fn, 'w') try: fp.write('%d\n' % (self._psrc_raw.get_parameter_len() or 0)) data = ifilter(lambda jobnum_pnum: jobnum_pnum[0] != jobnum_pnum[1], self._map_jobnum2pnum.items()) datastr = lmap(lambda jobnum_pnum: '%d:%d' % jobnum_pnum, data) fp.write('%s\n' % str.join(',', datastr)) finally: fp.close()
def __init__(self, path): activity = Activity('Reading dataset partition file') self._lock = GCLock() self._fmt = utils.DictFormat() self._tar = tarfile.open(path, 'r:') (self._cacheKey, self._cacheTar) = (None, None) metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), keyParser = {None: str}) self.maxJobs = metadata.pop('MaxJobs') self.classname = metadata.pop('ClassName') self.metadata = {'dataset': dict(ifilter(lambda k_v: not k_v[0].startswith('['), metadata.items()))} for (k, v) in ifilter(lambda k_v: k_v[0].startswith('['), metadata.items()): self.metadata.setdefault('dataset %s' % k.split(']')[0].lstrip('['), {})[k.split(']')[1].strip()] = v activity.finish() self._parserMap = { None: str, DataSplitter.NEntries: int, DataSplitter.Skipped: int, DataSplitter.DatasetID: int, DataSplitter.Invalid: parseBool, DataSplitter.Locations: lambda x: parseList(x, ','), DataSplitter.MetadataHeader: parseJSON, DataSplitter.Metadata: lambda x: parseJSON(x.strip("'")) }
def _combineSources(self, clsName, args): repeat = reduce(lambda a, b: a * b, ifilter(lambda expr: isinstance(expr, int), args), 1) args = lfilter(lambda expr: not isinstance(expr, int), args) if args: result = ParameterSource.createInstance(clsName, *args) if repeat > 1: return ParameterSource.createInstance('RepeatParameterSource', result, repeat) return result elif repeat > 1: return repeat return NullParameterSource()
def getCMSSWInfo(tarPath): import xml.dom.minidom # Read framework report files to get number of events tarFile = tarfile.open(tarPath, 'r:gz') fwkReports = ifilter(lambda x: os.path.basename(x.name) == 'report.xml', tarFile.getmembers()) for fwkReport in imap(tarFile.extractfile, fwkReports): try: yield xml.dom.minidom.parse(fwkReport) except Exception: logging.exception('Error while parsing %s', tarPath) raise
def _readJob2PID(self): fp = ZipFile(self._pathJob2PID, 'r') try: self.maxN = int(fp.readline()) if not self.maxN: self.maxN = None mapInfo = ifilter(identity, imap(str.strip, fp.readline().split(','))) self._mapJob2PID = dict(imap(lambda x: tuple(imap(lambda y: int(y.lstrip('!')), x.split(':'))), mapInfo)) self._activeMap = {} finally: fp.close()
def translatePSource(psource): keys_store = sorted(ifilter(lambda k: not k.untracked, psource.getJobKeys())) def translateEntry(meta): # Translates parameter setting into hash tmp = md5() for key in ifilter(lambda k: k in meta, keys_store): if str(meta[key]): tmp.update(str2bytes(key)) tmp.update(str2bytes(str(meta[key]))) return { ParameterInfo.HASH: tmp.hexdigest(), 'GC_PARAM': meta['GC_PARAM'], ParameterInfo.ACTIVE: meta[ParameterInfo.ACTIVE] } for entry in psource.iterJobs(): yield translateEntry(entry)
def _parse(self, proc): status_iter = proc.stdout.iter(self._timeout) next(status_iter) tmpHead = [CheckInfo.WMSID, 'user', CheckInfo.RAW_STATUS, CheckInfo.QUEUE, 'from', CheckInfo.WN, 'job_name'] for line in ifilter(identity, status_iter): try: tmp = line.split() job_info = dict(izip(tmpHead, tmp[:7])) job_info['submit_time'] = str.join(' ', tmp[7:10]) yield job_info except Exception: raise BackendError('Error reading job info:\n%s' % line)