def __init__(self, fn, format = 'sniffed'): (self._fn, self._format) = (fn, format) fp = open(fn) try: first_line = fp.readline() sniffed = csv.Sniffer().sniff(first_line) csv.register_dialect('sniffed', sniffed) tmp = list(csv.DictReader(fp, first_line.strip().split(sniffed.delimiter) + [None], dialect = format)) finally: fp.close() for entry in tmp: entry.pop(None, None) if None in entry.values(): raise Exception('Malformed entry in csv file %r: %r' % (fn, entry)) def cleanupDict(d): # strip all key value entries tmp = tuple(imap(lambda item: lmap(str.strip, item), d.items())) # filter empty parameters return lfilter(lambda k_v: k_v[0] != '', tmp) keys = [] if len(tmp): keys = lmap(ParameterMetadata, tmp[0].keys()) values = lmap(lambda d: dict(cleanupDict(d)), tmp) InternalParameterSource.__init__(self, values, keys)
def _cancel(self, task, wms, jobnum_list, interactive, show_jobs): if len(jobnum_list) == 0: return if show_jobs: self._abort_report.show_report(self.job_db, jobnum_list) if interactive and not self._uii.prompt_bool('Do you really want to cancel these jobs?', True): return def _mark_cancelled(jobnum): job_obj = self.job_db.get_job(jobnum) if job_obj is not None: self._update(task, job_obj, jobnum, Job.CANCELLED) self._local_event_handler.on_job_update(task, wms, job_obj, jobnum, {'reason': 'cancelled'}) jobnum_list.reverse() map_gc_id2jobnum = self._get_map_gc_id_jobnum(jobnum_list) gc_id_list = sorted(map_gc_id2jobnum, key=lambda gc_id: -map_gc_id2jobnum[gc_id]) for (gc_id,) in wms.cancel_jobs(gc_id_list): # Remove cancelledd job from todo list and mark as cancelled _mark_cancelled(map_gc_id2jobnum.pop(gc_id)) if map_gc_id2jobnum: jobnum_list = list(map_gc_id2jobnum.values()) self._log.warning('There was a problem with cancelling the following jobs:') self._abort_report.show_report(self.job_db, jobnum_list) if (not interactive) or self._uii.prompt_bool('Do you want to mark them as cancelled?', True): lmap(_mark_cancelled, jobnum_list) if interactive: wait(2)
def deploy_task(self, task, transfer_se, transfer_sb): # HACK self._output_fn_list = lmap(lambda d_s_t: d_s_t[2], self._get_out_transfer_info_list(task)) task.validate_variables() # add task SE files to SM self._sm_se_in.add_file_list(lmap(lambda d_s_t: d_s_t[2], task.get_se_in_fn_list())) # Transfer common SE files if transfer_se: self._sm_se_in.do_transfer(task.get_se_in_fn_list()) def _convert(fn_list): for fn in fn_list: if isinstance(fn, str): yield (fn, os.path.basename(fn)) else: yield (fn, os.path.basename(fn.name)) # Package sandbox tar file self._log.log(logging.INFO1, 'Packing sandbox') sandbox = self._get_sandbox_name(task) ensure_dir_exists(os.path.dirname(sandbox), 'sandbox directory') if not os.path.exists(sandbox) or transfer_sb: sandbox_file_list = self._get_sandbox_file_list(task, [self._sm_se_in, self._sm_se_out]) create_tarball(_convert(sandbox_file_list), name=sandbox)
def _get_sandbox_file_list(self, task, sm_list): # Prepare all input files dep_list = set(ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list))) dep_fn_list = lmap(lambda dep: resolve_path('env.%s.sh' % dep, lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list) task_config_dict = dict_union(self._remote_event_handler.get_mon_env_dict(), *imap(lambda x: x.get_task_dict(), [task] + sm_list)) task_config_dict.update({'GC_DEPFILES': str.join(' ', dep_list), 'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name}) task_config_str_list = DictFormat(escape_strings=True).format( task_config_dict, format='export %s%s%s\n') vn_alias_dict = dict(izip(self._remote_event_handler.get_mon_env_dict().keys(), self._remote_event_handler.get_mon_env_dict().keys())) vn_alias_dict.update(task.get_var_alias_map()) vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n') # Resolve wildcards in task input files def _get_task_fn_list(): for fpi in task.get_sb_in_fpi_list(): matched = glob.glob(fpi.path_abs) if matched != []: for match in matched: yield match else: yield fpi.path_abs return lchain([self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [ VirtualFile('_config.sh', sorted(task_config_str_list)), VirtualFile('_varmap.dat', sorted(vn_alias_str_list))]])
def cancel(self, wms, jobs, interactive = False, showJobs = True): if len(jobs) == 0: return if showJobs: self._reportClass(self.jobDB, self._task, jobs).display() if interactive and not utils.getUserBool('Do you really want to cancel these jobs?', True): return def mark_cancelled(jobNum): jobObj = self.jobDB.get(jobNum) if jobObj is None: return self._update(jobObj, jobNum, Job.CANCELLED) self._eventhandler.onJobUpdate(wms, jobObj, jobNum, {'reason': 'cancelled'}) jobs.reverse() for (jobNum, wmsId) in wms.cancelJobs(self._wmsArgs(jobs)): # Remove deleted job from todo list and mark as cancelled assert(self.jobDB.get(jobNum).wmsId == wmsId) jobs.remove(jobNum) mark_cancelled(jobNum) if len(jobs) > 0: self._log_user.warning('There was a problem with cancelling the following jobs:') self._reportClass(self.jobDB, self._task, jobs).display() if (interactive and utils.getUserBool('Do you want to mark them as cancelled?', True)) or not interactive: lmap(mark_cancelled, jobs) if interactive: utils.wait(2)
def makeJDL(self, jobNum, module): cfgPath = os.path.join(self._jobPath, 'job_%d.var' % jobNum) sbIn = lmap(lambda d_s_t: d_s_t[1], self._getSandboxFilesIn(module)) sbOut = lmap(lambda d_s_t: d_s_t[2], self._getSandboxFilesOut(module)) wcList = lfilter(lambda x: '*' in x, sbOut) if len(wcList): self._writeJobConfig(cfgPath, jobNum, module, {'GC_WC': str.join(' ', wcList)}) sandboxOutJDL = lfilter(lambda x: x not in wcList, sbOut) + ['GC_WC.tar.gz'] else: self._writeJobConfig(cfgPath, jobNum, module, {}) sandboxOutJDL = sbOut # Warn about too large sandboxes sbSizes = lmap(os.path.getsize, sbIn) if sbSizes and (self._warnSBSize > 0) and (sum(sbSizes) > self._warnSBSize * 1024 * 1024): if not utils.getUserBool('Sandbox is very large (%d bytes) and can cause issues with the WMS! Do you want to continue?' % sum(sbSizes), False): sys.exit(os.EX_OK) self._warnSBSize = 0 reqs = self.brokerSite.brokerAdd(module.getRequirements(jobNum), WMS.SITES) formatStrList = lambda strList: '{ %s }' % str.join(', ', imap(lambda x: '"%s"' % x, strList)) contents = { 'Executable': '"gc-run.sh"', 'Arguments': '"%d"' % jobNum, 'StdOutput': '"gc.stdout"', 'StdError': '"gc.stderr"', 'InputSandbox': formatStrList(sbIn + [cfgPath]), 'OutputSandbox': formatStrList(sandboxOutJDL), 'VirtualOrganisation': '"%s"' % self.vo, 'Rank': '-other.GlueCEStateEstimatedResponseTime', 'RetryCount': 2 } return self._jdl_writer.format(reqs, contents)
def process(self, pnum, partition_info, result): url_list = partition_info[DataSplitter.FileList] locations = partition_info.get(DataSplitter.Locations) if not locations: partition_info[DataSplitter.FileList] = lmap(lambda url: self._lookup(url, None), url_list) else: for location in locations: partition_info[DataSplitter.FileList] = lmap(lambda url: self._lookup(url, location), url_list)
def join_config_locations(opt_first, *opt_list): if isinstance(opt_first, (list, tuple)): # first option is a list - expand the first parameter if not opt_list: # only first option -> clean and return return lmap(str.strip, opt_first) return lchain(imap(lambda opt: join_config_locations(opt.strip(), *opt_list), opt_first)) if not opt_list: # only first option -> clean and return return [opt_first.strip()] return lmap(lambda opt: (opt_first + ' ' + opt).strip(), join_config_locations(*opt_list))
def process(self, pNum, splitInfo, result): fl = splitInfo[DataSplitter.FileList] locations = splitInfo.get(DataSplitter.Locations) if not locations: splitInfo[DataSplitter.FileList] = lmap(lambda fn: self._lookup(fn, None), fl) else: for location in locations: splitInfo[DataSplitter.FileList] = lmap(lambda fn: self._lookup(fn, location), fl)
def wait(timeout): shortStep = lmap(lambda x: (x, 1), irange(max(timeout - 5, 0), timeout)) for x, w in lmap(lambda x: (x, 5), irange(0, timeout - 5, 5)) + shortStep: if abort(): return False log = ActivityLog('waiting for %d seconds' % (timeout - x)) time.sleep(w) del log return True
def __init__(self, arg, **kwargs): def parseTerm(term): cmpValue = utils.QM(term[0] == '~', False, True) term = term.lstrip('~') selectorType = utils.QM(term[0].isdigit(), 'id', 'state') if ':' in term: selectorType = term.split(':', 1)[0] selector = JobSelector.createInstance(selectorType, term.split(':', 1)[-1], **kwargs) return lambda jobNum, jobObj: selector.__call__(jobNum, jobObj) == cmpValue orTerms = str.join('+', imap(str.strip, arg.split('+'))).split() self.js = lmap(lambda orTerm: lmap(parseTerm, orTerm.split('+')), orTerms)
def _getPartition(self, key): if not self._cacheKey == key / 100: self._cacheKey = key / 100 subTarFileObj = self._tar.extractfile('%03dXX.tgz' % (key / 100)) subTarFileObj = BytesBuffer(gzip.GzipFile(fileobj = subTarFileObj).read()) # 3-4x speedup for sequential access self._cacheTar = tarfile.open(mode = 'r', fileobj = subTarFileObj) data = self._fmt.parse(self._cacheTar.extractfile('%05d/info' % key).readlines(), keyParser = {None: int}, valueParser = self._parserMap) fileList = lmap(bytes2str, self._cacheTar.extractfile('%05d/list' % key).readlines()) if DataSplitter.CommonPrefix in data: fileList = imap(lambda x: '%s/%s' % (data[DataSplitter.CommonPrefix], x), fileList) data[DataSplitter.FileList] = lmap(str.strip, fileList) return data
def finaliseJobSplitting(self, block, splitInfo, files = None): # Copy infos from block for prop in ['Dataset', 'BlockName', 'Nickname', 'Locations']: if getattr(DataProvider, prop) in block: splitInfo[getattr(DataSplitter, prop)] = block[getattr(DataProvider, prop)] if DataProvider.Metadata in block: splitInfo[DataSplitter.MetadataHeader] = block[DataProvider.Metadata] # Helper for very simple splitter if files: splitInfo[DataSplitter.FileList] = lmap(lambda x: x[DataProvider.URL], files) splitInfo[DataSplitter.NEntries] = sum(imap(lambda x: x[DataProvider.NEntries], files)) if DataProvider.Metadata in block: splitInfo[DataSplitter.Metadata] = lmap(lambda x: x[DataProvider.Metadata], files) return splitInfo
def _finish_partition(self, block, partition, fi_list=None): # Copy infos from block for (dp_prop, ds_prop) in self._dp_ds_prop_list: if dp_prop in block: partition[ds_prop] = block[dp_prop] if DataProvider.Metadata in block: partition[DataSplitter.MetadataHeader] = block[DataProvider.Metadata] # Helper for very simple splitter if fi_list: partition[DataSplitter.FileList] = lmap(itemgetter(DataProvider.URL), fi_list) partition[DataSplitter.NEntries] = sum(imap(itemgetter(DataProvider.NEntries), fi_list)) if DataProvider.Metadata in block: partition[DataSplitter.Metadata] = lmap(itemgetter(DataProvider.Metadata), fi_list) return partition
def __init__(self, fn, format = 'sniffed'): sniffed = csv.Sniffer().sniff(open(fn).read(1024)) csv.register_dialect('sniffed', sniffed) tmp = list(csv.DictReader(open(fn), dialect = format)) def cleanupDict(d): # strip all key value entries tmp = tuple(imap(lambda item: imap(str.strip, item), d.items())) # filter empty parameters return lfilter(lambda k_v: k_v[0] != '', tmp) keys = [] if len(tmp): keys = lmap(ParameterMetadata, tmp[0].keys()) values = lmap(lambda d: dict(cleanupDict(d)), tmp) InternalParameterSource.__init__(self, values, keys)
def __init__(self, arg, **kwargs): def _parse_term(term): negate = (term[0] == '~') term = term.lstrip('~') selector_type = 'state' if term[0].isdigit(): selector_type = 'id' elif ':' in term: selector_type = term.split(':', 1)[0] selector = JobSelector.create_instance(selector_type, term.split(':', 1)[-1], **kwargs) if negate: return lambda jobnum, job_obj: not selector.__call__(jobnum, job_obj) return selector (self._arg, or_term_list) = (arg, str.join('+', imap(str.strip, arg.split('+'))).split()) self._js = lmap(lambda orTerm: lmap(_parse_term, orTerm.split('+')), or_term_list)
def list_parameters(opts, psource): (result, needGCParam) = get_parameters(opts, psource) enabledOutput = opts.output.split(',') output = lfilter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = lfilter(lambda k: k.untracked == False, output) untracked = lfilter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if opts.collapse == 1: stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5_hex(repr(lmap(pset.get, stored))) result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = lmap(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(izip(stored, stored))) if opts.untracked: head.extend(sorted(imap(lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result)
def _resyncPartition(self, modSI, jobNum, oldBlock, newBlock, filesMissing, filesMatched, doExpandOutside): if newBlock: # copy new location information modSI[DataSplitter.Locations] = newBlock.get(DataProvider.Locations) # Determine old size infos and get started def search_url(url): return fast_search(oldBlock[DataProvider.FileList], itemgetter(DataProvider.URL), url) sizeInfo = lmap(lambda url: search_url(url)[DataProvider.NEntries], modSI[DataSplitter.FileList]) metaIdxLookup = self._resyncGetMatchingMetadata(oldBlock, newBlock) extended = utils.QM(doExpandOutside, [], None) old_entries = modSI[DataSplitter.NEntries] (procMode, newMetadata) = self._resyncFiles(modSI, jobNum, sizeInfo, filesMissing, filesMatched, newBlock, metaIdxLookup, extended) # Disable invalid / invalidated partitions if (len(modSI[DataSplitter.FileList]) == 0) or (old_entries * modSI[DataSplitter.NEntries] <= 0): procMode = ResyncMode.disable if procMode == ResyncMode.disable: modSI[DataSplitter.Invalid] = True return (ResyncMode.disable, []) # Discard extensions # Update metadata if DataSplitter.Metadata in modSI: modSI.pop(DataSplitter.MetadataHeader) modSI.pop(DataSplitter.Metadata) if newMetadata: modSI[DataSplitter.MetadataHeader] = newBlock.get(DataProvider.Metadata) modSI[DataSplitter.Metadata] = newMetadata return (procMode, extended or [])
def __init__(self, arg, **kwargs): idList = imap(lambda x: x.split('-'), arg.split(',')) try: parse = lambda x: utils.QM(x != '', int, str) self.ranges = lmap(lambda x: (parse(x[0])(x[0]), parse(x[-1])(x[-1])), idList) except Exception: raise UserError('Job identifiers must be integers or ranges.')
def createLookupHelper(pconfig, var_list, lookup_list): # Return list of (doElevate, PSourceClass, arguments) entries if len(var_list) != 1: # multi-lookup handling result = [] for var_name in var_list: result.extend(createLookupHelper(pconfig, [var_name], lookup_list)) return result var_name = var_list[0] pvalue = pconfig.getParameter(var_name.lstrip('!')) if isinstance(pvalue, list): # simple parameter source return [(False, SimpleParameterSource, [var_name, pvalue])] elif isinstance(pvalue, tuple) and pvalue[0] == 'format': return [(False, FormatterParameterSource, pvalue[1:])] lookup_key = None if lookup_list: # default lookup key lookup_key = KeyParameterSource(*lookup_list) # Determine kind of lookup, [3] == lookupDictConfig, [0] == lookupContent tmp = lookupConfigParser(pconfig, KeyParameterSource(var_name), lookup_key) lookupContent = tmp[3][0] lookupLen = lmap(len, lookupContent.values()) if (min(lookupLen) == 1) and (max(lookupLen) == 1): # simple lookup sufficient for this setup return [(False, SimpleLookupParameterSource, list(tmp))] # switch needs elevation beyond local scope return [(True, SwitchingLookupParameterSource, list(tmp))]
def _parseLine(self, exceptionIntro, configContent, configFile, idx, line): exceptionIntroLineInfo = exceptionIntro + ':%d\n\t%r' % (idx, line) try: line = rsplit(line, ';', 1)[0].rstrip() except Exception: raise ConfigError(exceptionIntroLineInfo + '\nUnable to strip comments!') exceptionIntroLineInfo = exceptionIntro + ':%d\n\t%r' % (idx, line) # removed comment if line.lstrip().startswith(';') or line.lstrip().startswith('#') or not line.strip(): return # skip empty lines or comment lines elif line[0].isspace(): try: self._currentValue += '\n' + line.strip() self._currentIndices += [idx] except Exception: raise ConfigError(exceptionIntroLineInfo + '\nInvalid indentation!') elif line.startswith('['): if self._currentOption: self._storeOption(exceptionIntroLineInfo, configContent, configFile) try: self._currentSection = line[1:line.index(']')].strip() self._parseLine(exceptionIntro, configContent, configFile, idx, line[line.index(']') + 1:].strip()) except Exception: raise ConfigError(exceptionIntroLineInfo + '\nUnable to parse config section!') elif '=' in line: if self._currentOption: self._storeOption(exceptionIntroLineInfo, configContent, configFile) try: (self._currentOption, self._currentValue) = lmap(str.strip, line.split('=', 1)) self._currentIndices = [idx] except Exception: raise ConfigError(exceptionIntroLineInfo + '\nUnable to parse config option!') else: raise ConfigError(exceptionIntroLineInfo + '\nPlease use "key = value" syntax or indent values!')
def _getSectionKey(self, section): tmp = section.split() assert(len(tmp) > 0) (curSection, curNames, curTags) = (tmp[0], [], {}) for token in tmp[1:]: if ':' in token: tag_entry = token.split(':') assert(len(tag_entry) == 2) curTags[tag_entry[0]] = tag_entry[1] elif token: curNames.append(token) def myIndex(src, value): try: return src.index(value) except Exception: return None idxClass = myIndex(self._cfgClassSections, curSection) idxSection = myIndex(self._cfgSections, curSection) if (not self._cfgClassSections) and (not self._cfgSections): idxSection = 0 if (idxClass is not None) or (idxSection is not None): # Section is selected by class or manually idxNames = tuple(imap(lambda n: myIndex(self._cfgNames, n), curNames)) if None not in idxNames: # All names in current section are selected curTagNames = lfilter(lambda tn: tn in curTags, self._cfgTagsOrder) curTagNamesLeft = lfilter(lambda tn: tn not in self._cfgTagsOrder, curTags) idxTags = lmap(lambda tn: myIndex(self._cfgTags, (tn, curTags[tn])), curTagNames) if (None not in idxTags) and not curTagNamesLeft: return (idxClass, idxSection, idxNames, idxTags)
def lumi_calc(opts, workDir, jobList, splitter): (lumiDict, readDict, writeDict) = process_jobs(opts, workDir, jobList, splitter) activity = utils.ActivityLog('Simplifying lumi sections') lumis = {} for sample in lumiDict: for run in lumiDict[sample]: for lumi in lumiDict[sample][run]: lumis.setdefault(sample, []).append(([run, lumi], [run, lumi])) for sample in lumiDict: lumis[sample] = mergeLumi(lumis[sample]) activity.finish() for sample, lumi_list in lumis.items(): print('Sample: %s' % sample) if opts.job_events: print('=========================================') print('Number of events processed: %12s' % readDict.get(sample)) print(' Number of events written: %12d' % sum(writeDict.get(sample, {}).values())) if writeDict.get(sample, None): sys.stdout.write('\n') head = [(0, ' Output filename'), (1, 'Events')] utils.printTabular(head, lmap(lambda pfn: {0: pfn, 1: writeDict[sample][pfn]}, writeDict[sample])) if opts.job_json: json_fn = os.path.join(opts.output_dir or workDir, 'processed_%s.json' % sample) outputJSON(lumi_list, open(json_fn, 'w')) print('Saved processed lumi sections in ' + json_fn) if opts.job_gc: sys.stdout.write('\n') print('List of processed lumisections:') print('-----------------------------------------') outputGC(lumi_list) sys.stdout.write('\n')
def __init__(self, config, name): self._name = name # needed for changeView calls before the constructor head = [('DATASETNICK', 'Nickname')] # Mapping between nickname and config files: self._nmCfg = config.getLookup('nickname config', {}, defaultMatcher = 'regex', parser = lambda x: lmap(str.strip, x.split(',')), strfun = lambda x: str.join(',', x)) if not self._nmCfg.empty(): allConfigFiles = sorted(set(ichain(self._nmCfg.get_values()))) config.set('config file', str.join('\n', allConfigFiles)) head.append((1, 'Config file')) elif config.get('config file', ''): raise ConfigError("Please use 'nickname config' instead of 'config file'") # Mapping between nickname and constants - only display - work is handled by the 'normal' parameter factory nmCName = config.getList('nickname constants', [], onChange = None) param_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, addSections = ['parameters']) param_config.set('constants', str.join(' ', nmCName), '+=') for cName in nmCName: param_config.set(cName + ' matcher', 'regex') param_config.set(cName + ' lookup', 'DATASETNICK') head.append((cName, cName)) # Mapping between nickname and lumi filter - only display - work is handled by the 'normal' lumi filter config.set('lumi filter matcher', 'regex') if 'nickname lumi filter' in config.getOptions(): config.set('lumi filter', strDictLong(config.getDict('nickname lumi filter', {}, onChange = None))) self._nmLumi = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = None) if not self._nmLumi.empty(): head.append((2, 'Lumi filter')) CMSSW.__init__(self, config, name) self._displaySetup(config.getWorkPath('datacache.dat'), head)
def __init__(self, config): ParameterFactory.__init__(self, config) self._psrc_list = [] # Random number variables jobs_config = config.change_view(add_sections=['jobs']) self._random_variables = jobs_config.get_list('random variables', ['JOB_RANDOM'], on_change=None) nseeds = jobs_config.get_int('nseeds', 10) seeds_new = lmap(lambda x: str(random.randint(0, 10000000)), irange(nseeds)) self._random_seeds = jobs_config.get_list('seeds', seeds_new, persistent=True) # Get constants from [constants <tags...>] constants_config = config.change_view(view_class='TaggedConfigView', set_classes=None, set_sections=['constants'], set_names=None) constants_pconfig = ParameterConfig(constants_config) for vn_const in ifilter(lambda opt: ' ' not in opt, constants_config.get_option_list()): constants_config.set('%s type' % vn_const, 'verbatim', '?=') self._register_psrc(constants_pconfig, vn_const.upper()) param_config = config.change_view(view_class='TaggedConfigView', set_classes=None, add_sections=['parameters'], inherit_sections=True) # Get constants from [<Module>] constants task_pconfig = ParameterConfig(param_config) for vn_const in param_config.get_list('constants', []): config.set('%s type' % vn_const, 'verbatim', '?=') self._register_psrc(task_pconfig, vn_const) # Get global repeat value from 'parameters' section self._repeat = param_config.get_int('repeat', -1, on_change=None) self._req = param_config.get_bool('translate requirements', True, on_change=None) self._pfactory = param_config.get_plugin('parameter factory', 'SimpleParameterFactory', cls=ParameterFactory)
def __init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc, scanner_list_default): DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) # Configure scanners scanner_config = config.change_view(default_on_change=TriggerResync(['datasets', 'parameters'])) self._interactive_assignment = config.is_interactive('dataset name assignment', True) def _create_scanner(scanner_name): return InfoScanner.create_instance(scanner_name, scanner_config, datasource_name) scanner_list = scanner_config.get_list('scanner', scanner_list_default) + ['NullScanner'] self._scanner_list = lmap(_create_scanner, scanner_list) # Configure dataset / block naming and selection def _setup(prefix): selected_hash_list = scanner_config.get_list(join_config_locations(prefix, 'key select'), []) name = scanner_config.get(join_config_locations(prefix, 'name pattern'), '') return (selected_hash_list, name) (self._selected_hash_list_dataset, self._dataset_pattern) = _setup('dataset') (self._selected_hash_list_block, self._block_pattern) = _setup('block') # Configure hash input for separation of files into datasets / blocks def _get_active_hash_input(prefix, guard_entry_idx): hash_input_list_user = scanner_config.get_list(join_config_locations(prefix, 'hash keys'), []) hash_input_list_guard = scanner_config.get_list(join_config_locations(prefix, 'guard override'), lchain(imap(lambda scanner: scanner.get_guard_keysets()[guard_entry_idx], self._scanner_list))) return hash_input_list_user + hash_input_list_guard self._hash_input_set_dataset = _get_active_hash_input('dataset', 0) self._hash_input_set_block = _get_active_hash_input('block', 1)
def resync(self): oldMaxParameters = self._maxParameters # Perform resync of subsources psourceResyncList = lmap(lambda p: p.resync(), self._psourceList) # Update max for _translateNum self._psourceMaxList = lmap(lambda p: p.getMaxParameters(), self._psourceList) self._maxParameters = self.initMaxParameters() # translate affected pNums from subsources (result_redo, result_disable, dummy) = ParameterSource.resync(self) for (idx, psource_resync) in enumerate(psourceResyncList): (psource_redo, psource_disable, dummy) = psource_resync for pNum in psource_redo: result_redo.update(self._translateNum(idx, pNum)) for pNum in psource_disable: result_disable.update(self._translateNum(idx, pNum)) return (result_redo, result_disable, oldMaxParameters != self._maxParameters)
def _prepareSubmit(self, task, jobNumList, queryArguments): localJdlFilePath = os.path.join(self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName,md5(self.getURI()).hexdigest())) readyJobNumList = self._stageSubmitFiles(task, jobNumList) utils.safeWrite(open(localJdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, readyJobNumList, queryArguments))) raise NotImplementedError('JDL must get moved to remote') return jdlFilePath
def logging_create_handlers(config, logger_name): LogLevelEnum = makeEnum(lmap(lambda level: logging.getLevelName(level).upper(), irange(51))) logger = logging.getLogger(logger_name.lower()) # Set logging level logger.setLevel(config.getEnum(logger_name + ' level', LogLevelEnum, logger.level, onChange = None)) # Set propagate status logger.propagate = config.getBool(logger_name + ' propagate', bool(logger.propagate), onChange = None) # Setup handlers if logger_name + ' handler' in config.getOptions(): # remove any standard handlers: for handler in list(logger.handlers): logger.removeHandler(handler) handler_list = config.getList(logger_name + ' handler', [], onChange = None) for handler_str in set(handler_list): # add only unique output handlers if handler_str == 'stdout': handler = StdoutStreamHandler() elif handler_str == 'stderr': handler = StderrStreamHandler() elif handler_str == 'file': handler = logging.FileHandler(config.get(logger_name + ' file', onChange = None), 'w') elif handler_str == 'debug_file': handler = GCLogHandler(config.get(logger_name + ' debug file', onChange = None), 'w') else: raise Exception('Unknown handler %s for logger %s' % (handler_str, logger_name)) logger.addHandler(logging_configure_handler(config, logger_name, handler_str, handler))
def partition_check(splitter): fail = utils.set() for jobNum in irange(splitter.getMaxJobs()): splitInfo = splitter.getSplitInfo(jobNum) try: (events, skip, files) = (0, 0, []) for line in open(os.path.join(opts.checkSplitting, 'jobs', 'job_%d.var' % jobNum)).readlines(): if 'MAX_EVENTS' in line: events = int(line.split('MAX_EVENTS', 1)[1].replace('=', '')) if 'SKIP_EVENTS' in line: skip = int(line.split('SKIP_EVENTS', 1)[1].replace('=', '')) if 'FILE_NAMES' in line: files = line.split('FILE_NAMES', 1)[1].replace('=', '').replace('\"', '').replace('\\', '') files = lmap(lambda x: x.strip().strip(','), files.split()) def printError(curJ, curS, msg): if curJ != curS: logging.warning('%s in job %d (j:%s != s:%s)', msg, jobNum, curJ, curS) fail.add(jobNum) printError(events, splitInfo[DataSplitter.NEntries], 'Inconsistent number of events') printError(skip, splitInfo[DataSplitter.Skipped], 'Inconsistent number of skipped events') printError(files, splitInfo[DataSplitter.FileList], 'Inconsistent list of files') except Exception: logging.warning('Job %d was never initialized!', jobNum) if fail: logging.warning('Failed: ' + str.join('\n', imap(str, fail)))
def parseDict(entries, parserValue = identity, parserKey = identity): (result, resultParsed, order) = ({}, {}, []) key = None for entry in entries.splitlines(): if '=>' in entry: key, entry = lmap(str.strip, entry.split('=>', 1)) if key and (key not in order): order.append(key) if (key is not None) or entry.strip() != '': result.setdefault(key, []).append(entry.strip()) def parserKeyIntern(key): if key: return parserKey(key) for key, value in result.items(): value = parserValue(str.join('\n', value).strip()) resultParsed[parserKeyIntern(key)] = value return (resultParsed, lmap(parserKeyIntern, order))
def __init__(self, arg, **kwargs): idList = imap(lambda x: x.split('-'), arg.split(',')) try: parse = lambda x: utils.QM(x != '', int, str) self.ranges = lmap( lambda x: (parse(x[0])(x[0]), parse(x[-1])(x[-1])), idList) except Exception: raise UserError('Job identifiers must be integers or ranges.')
def se_to_cms_name(self, se): site_names = [] for site_resource in ifilter(lambda resources: resources['fqdn'] == se, self._query('site-resources')): site_names.extend( self._query('site-names', match=site_resource['site_name'])) return lmap(lambda x: x['alias'], ifilter(lambda site: site['type'] == 'cms', site_names))
def parseLumiFromJSON(data, select=''): runs = json.loads(data) rr = lmap(makeint, select.split('-') + [''])[:2] for run in imap(int, runs.keys()): if (rr[0] and run < rr[0]) or (rr[1] and run > rr[1]): continue for lumi in runs[str(run)]: yield ([run, lumi[0]], [run, lumi[1]])
def __init__(self, arg, **kwargs): if 'task' not in kwargs: raise TaskNeededException() def _create_regex_item(value): return (value.split('=', 1)[0], re.compile(value.split('=', 1)[1])) (self._arg, self._regex_obj_list) = (arg, lmap(_create_regex_item, arg.split(','))) self._job_config = lambda jobnum, var: str(kwargs['task'].get_job_dict(jobnum).get(var, ''))
def __init__(self, config, user=None): CheckJobsWithProcess.__init__( self, config, GridEngineCheckJobsProcessCreator(config)) self._job_status_key = lmap( str.lower, config.get_list('job status key', ['JB_jobnum', 'JB_jobnumber', 'JB_job_number'], on_change=None))
def execute(self, wms_id_list, wms_name): # yields list of (wms_id,) marked_wms_id_list = lmap(lambda result: result[0], self._cancel_executor.execute(wms_id_list, wms_name)) time.sleep(5) activity = Activity('Purging jobs') for result in self._purge_executor.execute(marked_wms_id_list, wms_name): yield result activity.finish()
def sitesReq(self, sites): fmt = lambda x: 'RegExp(%s, other.GlueCEUniqueID)' % jdlEscape(x) (blacklist, whitelist) = utils.splitBlackWhiteList(sites) sitereqs = lmap(lambda x: '!' + fmt(x), blacklist) if len(whitelist): sitereqs.append('(%s)' % str.join(' || ', imap(fmt, whitelist))) if sitereqs: return '( %s )' % str.join(' && ', sitereqs)
def _parse_line(line): if not line.startswith('#'): pnum_str, stored_json = line.split('\t', 1) is_invalid = '!' in pnum_str pnum = int(pnum_str.replace('!', ' ')) return (is_invalid, pnum, lmap(parse_json, stored_json.strip().split('\t')))
def fnProps(path, metadata, events, seList, objStore): if events is None: events = -1 return { DataProvider.URL: path, DataProvider.NEntries: events, DataProvider.Metadata: lmap(metadata.get, metaKeys) }
def _get_reduced_url_list(self, partition, url_list): # Determine the filenames to write (and conditionally set the common prefix in partition) commonprefix = os.path.commonprefix(url_list) commonprefix = str.join('/', commonprefix.split('/')[:-1]) if len(commonprefix) > 6: partition[DataSplitter.CommonPrefix] = commonprefix return lmap(lambda x: x.replace(commonprefix + '/', ''), url_list) return url_list
def strDictLong(value, parser = identity, strfun = str): (srcdict, srckeys) = value getmax = lambda src: max(lmap(lambda x: len(str(x)), src) + [0]) result = '' if srcdict.get(None) is not None: result = strfun(srcdict.get(None, parser(''))) fmt = '\n\t%%%ds => %%%ds' % (getmax(srckeys), getmax(srcdict.values())) return result + str.join('', imap(lambda k: fmt % (k, strfun(srcdict[k])), srckeys))
def _getSandboxFiles(self, task, monitor, smList): # Prepare all input files depList = set( ichain(imap(lambda x: x.getDependencies(), [task] + smList))) depPaths = lmap(lambda pkg: utils.pathShare('', pkg=pkg), os.listdir(utils.pathPKG())) depFiles = lmap( lambda dep: utils.resolvePath('env.%s.sh' % dep, depPaths), depList) taskEnv = utils.mergeDicts( imap(lambda x: x.getTaskConfig(), [monitor, task] + smList)) taskEnv.update({ 'GC_DEPFILES': str.join(' ', depList), 'GC_USERNAME': self._token.getUsername(), 'GC_WMS_NAME': self.wmsName }) taskConfig = sorted( utils.DictFormat(escapeString=True).format( taskEnv, format='export %s%s%s\n')) varMappingDict = dict( izip(monitor.getTaskConfig().keys(), monitor.getTaskConfig().keys())) varMappingDict.update(task.getVarMapping()) varMapping = sorted( utils.DictFormat(delimeter=' ').format(varMappingDict, format='%s%s%s\n')) # Resolve wildcards in task input files def getTaskFiles(): for f in task.getSBInFiles(): matched = glob.glob(f.pathAbs) if matched != []: for match in matched: yield match else: yield f.pathAbs return lchain([ monitor.getFiles(), depFiles, getTaskFiles(), [ VirtualFile('_config.sh', taskConfig), VirtualFile('_varmap.dat', varMapping) ] ])
def _prepareSubmit(self, task, jobnum_list, queryArguments): jdlFilePath = os.path.join( self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wms_name, md5_hex(self.getURI()))) safe_write( open(jdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, jobnum_list, queryArguments))) return jdlFilePath
def resync(self): oldMaxParameters = self._maxParameters # Perform resync of subsources psourceResyncList = lmap(lambda p: p.resync(), self._psourceList) # Update max for _translateNum self._psourceMaxList = lmap(lambda p: p.getMaxParameters(), self._psourceList) self._maxParameters = self._initMaxParameters() # translate affected pNums from subsources (result_redo, result_disable, dummy) = ParameterSource.resync(self) for (idx, psource_resync) in enumerate(psourceResyncList): (psource_redo, psource_disable, dummy) = psource_resync for pNum in psource_redo: result_redo.update(self._translateNum(idx, pNum)) for pNum in psource_disable: result_disable.update(self._translateNum(idx, pNum)) return (result_redo, result_disable, oldMaxParameters != self._maxParameters)
def parse_lumi_from_json(data, select=''): run_dict = json.loads(data) run_range = lmap(_parse_lumi_int, select.split('-') + [''])[:2] for run in imap(int, run_dict.keys()): if (run_range[0] and run < run_range[0]) or (run_range[1] and run > run_range[1]): continue for lumi_range in run_dict[str(run)]: yield ([run, lumi_range[0]], [run, lumi_range[1]])
def _get_jdl_str_list(self, jobnum_list, task): (script_cmd, sb_in_fn_list) = self._get_script_and_fn_list(task) # header for all jobs jdl_str_list = [ 'Universe = ' + self._universe, 'Executable = ' + (sb_in_fn_list[0] if self._remote_type == PoolType.GRIDC else script_cmd), ] if self._remote_type == PoolType.GRIDC: jdl_str_list.extend([ 'use_x509userproxy = True', 'grid_resource = %s' % self._grid_resource, ]) else: jdl_str_list.append('transfer_executable = false') jdl_str_list.extend(self._jdl_writer.get_jdl()) jdl_str_list.extend([ 'Log = ' + os.path.join(self._get_remote_output_dn(), 'GC_Condor.%s.log') % self._task_id, 'should_transfer_files = YES', 'when_to_transfer_output = ON_EXIT', ]) # cancel held jobs - ignore spooling ones remove_cond = '(JobStatus == 5 && HoldReasonCode != 16)' if self._wall_time_mode == WallTimeMode.hard: # remove a job when it exceeds the requested wall time remove_cond += ' || ((JobStatus == 2) && (CurrentTime - EnteredCurrentStatus) > %s)' % task.wall_time jdl_str_list.append('periodic_remove = (%s)' % remove_cond) if self._wall_time_mode != WallTimeMode.ignore: jdl_str_list.append('max_job_retirement_time = %s' % task.wall_time) if self._remote_type == PoolType.SPOOL: jdl_str_list.extend([ # remote submissal requires job data to stay active until retrieved 'leave_in_queue = (JobStatus == 4) && ' + '((StageOutFinish =?= UNDEFINED) || (StageOutFinish == 0))', # Condor should not attempt to assign to local user '+Owner=UNDEFINED' ]) for auth_fn in self._token.get_auth_fn_list(): if self._remote_type not in (PoolType.SSH, PoolType.GSISSH): jdl_str_list.append('x509userproxy = %s' % auth_fn) else: jdl_str_list.append('x509userproxy = %s' % os.path.join( self._get_remote_output_dn(), os.path.basename(auth_fn))) # job specific data for jobnum in jobnum_list: jdl_str_list.extend( self._get_jdl_str_list_job(jobnum, task, sb_in_fn_list)) # combine JDL and add line breaks return lmap(lambda line: line + '\n', jdl_str_list)
def _join_config_locations(*opt_list): opt_first = opt_list[0] opt_list = opt_list[1:] if isinstance( opt_first, (list, tuple)): # first option is a list - expand the first parameter if not opt_list: # only first option -> clean and return return lmap(str.strip, opt_first) return lchain( imap(lambda opt: _join_config_locations(opt.strip(), *opt_list), opt_first)) if not opt_list: # only first option -> clean and return return [opt_first.strip()] def _do_join(opt): return (opt_first + ' ' + opt).strip() return lmap(_do_join, _join_config_locations(*opt_list))
def parse_time(usertime): if usertime is None or usertime == '': return -1 tmp = lmap(int, usertime.split(':')) while len(tmp) < 3: tmp.append(0) if tmp[2] > 59 or tmp[1] > 59 or len(tmp) > 3: raise Exception('Invalid time format: %s' % usertime) return reduce(lambda x, y: x * 60 + y, tmp)
def get_site_status(report): siteinfo = report.getWNInfos() states = ['FAILED', 'WAITING', 'SUCCESS', 'RUNNING'] sites = ifilter(lambda x: x not in states, siteinfo) return dict( imap( lambda site: (site, lmap(lambda state: siteinfo[site][state]['COUNT'], states)), sites))
def resync_psrc(self): psrc_max_old = self._psrc_max # Perform resync of subsources psrc_resync_list = lmap(lambda p: p.resync_psrc(), self._psrc_list) # Update max for _translate_pnum self._psrc_max_list = lmap(lambda p: p.get_parameter_len(), self._psrc_list) self._psrc_max = self._init_psrc_max() # translate affected pnums from subsources (result_redo, result_disable, _) = ParameterSource.get_empty_resync_result() for (psrc_idx, psrc_resync) in enumerate(psrc_resync_list): (psrc_redo, psrc_disable, _) = psrc_resync for pnum in psrc_redo: result_redo.update(self._translate_pnum(psrc_idx, pnum)) for pnum in psrc_disable: result_disable.update(self._translate_pnum(psrc_idx, pnum)) return (result_redo, result_disable, psrc_max_old != self._psrc_max)
def _collectFiles(self): def recurse(level, collectorList, args): if collectorList: for data in recurse(level - 1, collectorList[:-1], args): for (path, metadata, nEvents, seList, objStore) in collectorList[-1](level, *data): yield (path, dict(metadata), nEvents, seList, objStore) else: yield args return recurse(len(self.scanner), lmap(lambda x: x.getEntriesVerbose, self.scanner), (None, {}, None, None, {}))
def _writeJob2PID(self, fn): fp = ZipFile(fn, 'w') try: fp.write('%d\n' % (self._rawSource.getMaxParameters() or 0)) data = ifilter(lambda jobNum_pNum: jobNum_pNum[0] != jobNum_pNum[1], self._mapJob2PID.items()) datastr = lmap(lambda jobNum_pNum: '%d:%d' % jobNum_pNum, data) fp.write('%s\n' % str.join(',', datastr)) finally: fp.close()
def _format_reqs_sites(self, sites): def _fmt_sites(site): return 'RegExp(%s, other.GlueCEUniqueID)' % self._escape(site) (blacklist, whitelist) = split_blackwhite_list(sites) sitereqs = lmap(lambda x: '!' + _fmt_sites(x), blacklist) if whitelist: sitereqs.append('(%s)' % str.join(' || ', imap(_fmt_sites, whitelist))) if sitereqs: return '( %s )' % str.join(' && ', sitereqs)
def _prepareSubmit(self, task, jobNumList, queryArguments): jdlFilePath = os.path.join( self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName, md5(self.getURI()).hexdigest())) utils.safeWrite( open(jdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, jobNumList, queryArguments))) return jdlFilePath
def __init__(self, arg, **kwargs): def parseTerm(term): negate = (term[0] == '~') term = term.lstrip('~') selectorType = utils.QM(term[0].isdigit(), 'id', 'state') if ':' in term: selectorType = term.split(':', 1)[0] selector = JobSelector.createInstance(selectorType, term.split(':', 1)[-1], **kwargs) if negate: return lambda jobNum, jobObj: not selector.__call__( jobNum, jobObj) return selector.__call__ orTerms = str.join('+', imap(str.strip, arg.split('+'))).split() self._js = lmap(lambda orTerm: lmap(parseTerm, orTerm.split('+')), orTerms)
def __init__(self, config, datasetExpr, datasetNick, sList): DataProvider.__init__(self, config, datasetExpr, datasetNick) (self._ds_select, self._ds_name, self._ds_keys_user, self._ds_keys_guard) = self._setup(config, 'dataset') (self._b_select, self._b_name, self._b_keys_user, self._b_keys_guard) = self._setup(config, 'block') scanList = config.getList('scanner', sList) + ['NullScanner'] self._scanner = lmap( lambda cls: InfoScanner.createInstance(cls, config), scanList)
def __init__(self, jobDB, task, jobs = None, configString = ''): Report.__init__(self, jobDB, task, jobs, configString) self._levelMap = {'wms': 2, 'endpoint': 3, 'site': 4, 'queue': 5} self._useHistory = ('history' in configString) configString = configString.replace('history', '') self._idxList = lmap(lambda x: self._levelMap[x.lower()], configString.split()) self._idxList.reverse() self._stateMap = [(None, 'WAITING'), (Job.RUNNING, 'RUNNING'), (Job.FAILED, 'FAILED'), (Job.SUCCESS, 'SUCCESS')]
def cms_name_to_se(self, cms_name): cms_name_regex = re.compile(cms_name.replace('*', '.*').replace('%', '.*')) def _select_psn_site(site): return site['type'] == 'psn' and cms_name_regex.match(site['alias']) psn_site_names = ifilter(_select_psn_site, self._query('site-names')) site_aliases = set(imap(lambda x: x['alias'], psn_site_names)) def _select_se(resource): return (resource['type'] == 'SE') and (resource['alias'] in site_aliases) return lmap(lambda x: x['fqdn'], ifilter(_select_se, self._query('site-resources')))
def _get_sandbox_file_list(self, task, sm_list): # Prepare all input files dep_list = set( ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list))) dep_fn_list = lmap( lambda dep: resolve_path( 'env.%s.sh' % dep, lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list) task_config_dict = dict_union( self._remote_event_handler.get_mon_env_dict(), *imap(lambda x: x.get_task_dict(), [task] + sm_list)) task_config_dict.update({ 'GC_DEPFILES': str.join(' ', dep_list), 'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name }) task_config_str_list = DictFormat(escape_strings=True).format( task_config_dict, format='export %s%s%s\n') vn_alias_dict = dict( izip(self._remote_event_handler.get_mon_env_dict().keys(), self._remote_event_handler.get_mon_env_dict().keys())) vn_alias_dict.update(task.get_var_alias_map()) vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n') # Resolve wildcards in task input files def _get_task_fn_list(): for fpi in task.get_sb_in_fpi_list(): matched = glob.glob(fpi.path_abs) if matched != []: for match in matched: yield match else: yield fpi.path_abs return lchain([ self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [ VirtualFile('_config.sh', sorted(task_config_str_list)), VirtualFile('_varmap.dat', sorted(vn_alias_str_list)) ] ])