def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0): self._lumi_filter = parseLumiFilter(config.get('lumi filter', '')) if self._lumi_filter: config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self._lumi_query = config.getBool('lumi metadata', self._lumi_filter != []) self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC', defaultMatcher='blackwhite', defaultFilter='weak') self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL', defaultMatcher='blackwhite', defaultFilter='weak') self._phedexT1Mode = config.get('phedex t1 mode', 'disk').lower() self.onlyComplete = config.getBool('only complete sites', True) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname) (self._datasetPath, self._url, self._datasetBlock) = utils.optSplit(datasetExpr, '@#') self._url = self._url or config.get('dbs instance', '') self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True)
def __init__(self, config, name): head = [(0, "Nickname")] # Mapping between nickname and config files: cfgList = config.get("nickname config", "") self.nmCfg = config.getDict( "nickname config", {}, parser=lambda x: map(str.strip, x.split(",")), str=lambda x: str.join(",", x) )[0] if cfgList: if "config file" in config.getOptions(): raise ConfigError("Please use 'nickname config' instead of 'config file'") allConfigFiles = utils.flatten(self.nmCfg.values()) config.set("config file", str.join("\n", allConfigFiles)) head.append((1, "Config file")) # Mapping between nickname and constants: self.nmCName = map(str.strip, config.get("nickname constants", "").split()) self.nmConst = {} for var in self.nmCName: tmp = config.getDict(var, {})[0] for (nick, value) in tmp.items(): if value: self.nmConst.setdefault(nick, {})[var] = value else: self.nmConst.setdefault(nick, {})[var] = "" head.append((var, var)) # Mapping between nickname and lumi filter: if "lumi filter" in config.getOptions(): raise ConfigError("Please use 'nickname lumi filter' instead of 'lumi filter'") lumiParse = lambda x: formatLumi(parseLumiFilter(x)) self.nmLumi = config.getDict("nickname lumi filter", {}, parser=lumiParse)[0] if self.nmLumi: for dataset in config.get("dataset", "").splitlines(): (datasetNick, datasetProvider, datasetExpr) = DataProvider.parseDatasetExpr(config, dataset, None) config.set( "dataset %s" % datasetNick, "lumi filter", str.join(",", utils.flatten(fromNM(self.nmLumi, datasetNick, []))), ) config.set("lumi filter", str.join(",", self.nmLumi.get(None, []))) head.append((2, "Lumi filter")) utils.vprint("Mapping between nickname and other settings:\n", -1) def report(): for nick in sorted(set(self.nmCfg.keys() + self.nmConst.keys() + self.nmLumi.keys())): tmp = { 0: nick, 1: str.join(", ", map(os.path.basename, self.nmCfg.get(nick, ""))), 2: self.displayLumi(self.nmLumi.get(nick, "")), } yield utils.mergeDicts([tmp, self.nmConst.get(nick, {})]) utils.printTabular(head, report(), "cl") utils.vprint(level=-1) CMSSW.__init__(self, config, name)
def getVarsForNick(self, nick): data = {"CMSSW_CONFIG": str.join(" ", map(os.path.basename, utils.flatten(fromNM(self.nmCfg, nick, ""))))} constants = utils.mergeDicts(fromNM(self.nmConst, None, {}) + fromNM(self.nmConst, nick, {})) constants = dict(map(lambda var: (var, constants.get(var, "")), self.nmCName)) data.update(constants) lumifilter = utils.flatten(fromNM(self.nmLumi, nick, "")) if lumifilter: data["LUMI_RANGE"] = parseLumiFilter(str.join(",", lumifilter)) return data
def lumi_expr(opts, args): if len(args) == 0: raise Exception('No arguments given!') try: lumis = parseLumiFilter(str.join(' ', args)) except Exception: raise Exception('Could not parse: %s' % str.join(' ', args)) if opts.gc: outputGC(lumis) if opts.json: outputJSON(lumis) if opts.full: result = {} for rlrange in lumis: start, end = rlrange assert(start[0] == end[0]) result.setdefault(start[0], []).extend(irange(start[1], end[1] + 1)) print(result)
def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): self._lumi_filter = parseLumiFilter(config.get('lumi filter', '')) if self._lumi_filter: config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self._lumi_query = config.getBool('lumi metadata', self._lumi_filter != []) self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC', defaultMatcher = 'blackwhite', defaultFilter = 'weak') self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL', defaultMatcher = 'blackwhite', defaultFilter = 'weak') self._phedexT1Mode = config.get('phedex t1 mode', 'disk').lower() self.onlyComplete = config.getBool('only complete sites', True) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname) (self._datasetPath, self._url, self._datasetBlock) = utils.optSplit(datasetExpr, '@#') self._url = self._url or config.get('dbs instance', '') self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True)
def lumi_expr(opts, args): if len(args) == 0: raise Exception('No arguments given!') try: lumis = parseLumiFilter(str.join(' ', args)) except Exception: raise Exception('Could not parse: %s' % str.join(' ', args)) if opts.gc: outputGC(lumis) if opts.json: outputJSON(lumis) if opts.full: result = {} for rlrange in lumis: start, end = rlrange assert (start[0] == end[0]) result.setdefault(start[0], []).extend(irange(start[1], end[1] + 1)) print(result)
def __init__(self, config, datasetExpr, datasetNick, datasetID = 0): DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self.phedexBL = config.getList('phedex sites', ['-T3_US_FNALLPC']) self.phedexWL = config.getList('phedex t1 accept', ['T1_DE_KIT', 'T1_US_FNAL']) self.phedexT1 = config.get('phedex t1 mode', 'disk').lower() self.onlyComplete = config.getBool('only complete sites', True) self.locationFormat = config.get('location format', 'hostname').lower() # hostname or sitedb if self.locationFormat not in ['hostname', 'sitedb', 'both']: raise ConfigError('Invalid location format: %s' % self.locationFormat) (self.datasetPath, self.url, self.datasetBlock) = utils.optSplit(datasetExpr, '@#') self.url = utils.QM(self.url, self.url, config.get('dbs instance', '')) self.datasetBlock = utils.QM(self.datasetBlock, self.datasetBlock, 'all') self.includeLumi = config.getBool('keep lumi metadata', False) self.onlyValid = config.getBool('only valid', True) self.checkUnique = config.getBool('check unique', True) # This works in tandem with active task module (cmssy.py supports only [section] lumi filter!) self.selectedLumis = parseLumiFilter(config.get('lumi filter', '')) if self.selectedLumis: utils.vprint('Runs/lumi section filter enabled! (%d entries)' % len(self.selectedLumis), -1, once = True) utils.vprint('\tThe following runs and lumi sections are selected:', 1, once = True) utils.vprint('\t' + utils.wrapList(formatLumi(self.selectedLumis), 65, ',\n\t'), 1, once = True)
def main(): if opts.save_jobjson or opts.save_jobgc or opts.get_events: (workDir, nJobs, jobList) = getWorkJobs(args) (log, incomplete, splitter, splitInfo) = (None, False, None, {}) (lumiDict, readDict, writeDict) = ({}, {}, {}) try: splitter = DataSplitter.loadState(os.path.join(workDir, 'datamap.tar')) except Exception: pass jobList = sorted(jobList) for jobNum in jobList: del log log = utils.ActivityLog('Reading job logs - [%d / %d]' % (jobNum, jobList[-1])) jobInfo = getJobInfo(workDir, jobNum, lambda retCode: retCode == 0) if not jobInfo: if not incomplete: print 'WARNING: Not all jobs have finished - results will be incomplete!' incomplete = True continue if not parameterized: if splitter: splitInfo = splitter.getSplitInfo(jobNum) outputName = splitInfo.get(DataSplitter.Nickname, splitInfo.get(DataSplitter.DatasetID, 0)) else: outputName = jobInfo['file'].split()[2].replace("_%d_" % jobNum, '_').replace('/', '_').replace('__', '_') # Read framework report files to get number of events try: outputDir = os.path.join(workDir, 'output', 'job_' + str(jobNum)) for fwkXML in getCMSSWInfo(os.path.join(outputDir, 'cmssw.dbs.tar.gz')): for run in fwkXML.getElementsByTagName('Run'): for lumi in run.getElementsByTagName('LumiSection'): run_id = int(run.getAttribute('ID')) lumi_id = int(lumi.getAttribute('ID')) lumiDict.setdefault(outputName, {}).setdefault(run_id, set()).add(lumi_id) for outFile in fwkXML.getElementsByTagName('File'): pfn = outFile.getElementsByTagName('PFN')[0].childNodes[0].data if pfn not in writeDict.setdefault(outputName, {}): writeDict[outputName][pfn] = 0 writeDict[outputName][pfn] += int(outFile.getElementsByTagName('TotalEvents')[0].childNodes[0].data) for inFile in fwkXML.getElementsByTagName('InputFile'): if outputName not in readDict: readDict[outputName] = 0 readDict[outputName] += int(inFile.getElementsByTagName('EventsRead')[0].childNodes[0].data) except KeyboardInterrupt: sys.exit(os.EX_OK) except Exception: raise print 'Error while parsing framework output of job %s!' % jobNum continue del log log = utils.ActivityLog('Simplifying lumi sections') lumis = {} for sample in lumiDict: for run in lumiDict[sample]: for lumi in lumiDict[sample][run]: lumis.setdefault(sample, []).append(([run, lumi], [run, lumi])) for sample in lumiDict: lumis[sample] = mergeLumi(lumis[sample]) del log for sample, lumis in lumis.items(): print 'Sample:', sample print '=========================================' print 'Number of events processed: %12d' % readDict[sample] print ' Number of events written: %12d' % sum(writeDict.get(sample, {}).values()) if writeDict.get(sample, None): print head = [(0, ' Output filename'), (1, 'Events')] utils.printTabular(head, map(lambda pfn: {0: pfn, 1: writeDict[sample][pfn]}, writeDict[sample])) if opts.save_jobjson: outputJSON(lumis, open(os.path.join(workDir, 'processed_%s.json' % sample), 'w')) print 'Saved processed lumi sections in', os.path.join(workDir, 'processed_%s.json' % sample) if opts.save_jobgc: print print 'List of processed lumisections:' print '-----------------------------------------' outputGC(lumis) print ########################### # Lumi filter manuipulation ########################### if opts.save_exprgc or opts.save_exprjson or opts.save_exprfull: if len(args) == 0: raise Exception('No arguments given!') try: lumis = parseLumiFilter(str.join(' ', args)) except Exception: raise Exception('Could not parse: %s' % str.join(' ', args)) if opts.save_exprgc: outputGC(lumis) if opts.save_exprjson: outputJSON(lumis) if opts.save_exprfull: result = {} for rlrange in lumis: start, end = rlrange assert(start[0] == end[0]) llist = result.setdefault(start[0], []).extend(range(start[1], end[1] + 1)) print result
def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set('dataset processor', 'CMSDataSplitProcessor SECheckSplitProcessor') DataTask.__init__(self, config, name) self.errorDict.update(dict(self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')))) # SCRAM info scramProject = config.getList('scram project', []) if len(scramProject): self.projectArea = config.getPath('project area', '') if len(self.projectArea): raise ConfigError('Cannot specify both SCRAM project and project area') if len(scramProject) != 2: raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION') else: self.projectArea = config.getPath('project area') # This works in tandem with provider_dbsv2.py ! self.selectedLumis = parseLumiFilter(config.get('lumi filter', '')) self.useReqs = config.getBool('software requirements', True, onChange = None) self._projectAreaTarballSE = config.getBool(['se project area', 'se runtime'], True) self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz') if len(self.projectArea): defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*' self.pattern = config.getList('area files', defaultPattern.split()) if os.path.exists(self.projectArea): utils.vprint('Project area found in: %s' % self.projectArea, -1) else: raise ConfigError('Specified config area %r does not exist!' % self.projectArea) scramPath = os.path.join(self.projectArea, '.SCRAM') # try to open it try: fp = open(os.path.join(scramPath, 'Environment'), 'r') self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str}) except Exception: raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea) for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']: if key not in self.scramEnv: raise ConfigError('Installed program in project area not recognized.') archs = filter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath)) self.scramArch = config.get('scram arch', (archs + [noDefault])[0]) try: fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r') self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str})) except Exception: raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch) else: self.scramEnv = { 'SCRAM_PROJECTNAME': scramProject[0], 'SCRAM_PROJECTVERSION': scramProject[1] } self.scramArch = config.get('scram arch') self.scramVersion = config.get('scram version', 'scramv1') if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW': raise ConfigError('Project area not a valid CMSSW project area.') # Information about search order for software environment self.searchLoc = [] if config.getState('init', detail = 'sandbox'): userPath = config.get('cmssw dir', '') if userPath != '': self.searchLoc.append(('CMSSW_DIR_USER', userPath)) if self.scramEnv.get('RELEASETOP', None): projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP']) self.searchLoc.append(('CMSSW_DIR_PRO', projPath)) if len(self.searchLoc): utils.vprint('Local jobs will try to use the CMSSW software located here:', -1) for i, loc in enumerate(self.searchLoc): key, value = loc utils.vprint(' %i) %s' % (i + 1, value), -1) # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError('Prefix executable and argument options with either prolog or epilog!') self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance self.configFiles = [] cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault) for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False): newPath = config.getWorkPath(os.path.basename(cfgFile)) if not os.path.exists(newPath): if not os.path.exists(cfgFile): raise ConfigError('Config file %r not found.' % cfgFile) shutil.copyfile(cfgFile, newPath) self.configFiles.append(newPath) # Check that for dataset jobs the necessary placeholders are in the config file self.prepare = config.getBool('prepare config', False) fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms')) if self.dataSplitter != None: if config.getState('init', detail = 'sandbox'): if len(self.configFiles) > 0: self.instrumentCfgQueue(self.configFiles, fragment, mustPrepare = True) else: self.eventsPerJob = config.get('events per job', '0') if config.getState('init', detail = 'sandbox') and self.prepare: self.instrumentCfgQueue(self.configFiles, fragment) if not os.path.exists(self._projectAreaTarball): config.setState(True, 'init', detail = 'sandbox') if config.getState('init', detail = 'sandbox'): if os.path.exists(self._projectAreaTarball): if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True): return # Generate CMSSW tarball if self.projectArea: utils.genTarball(self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern)) if self._projectAreaTarballSE: config.setState(True, 'init', detail = 'storage')