def __init__(self, config, datasetExpr, datasetNick, datasetID = 0): DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) (self._path, self._events, selist) = utils.optSplit(datasetExpr, '|@') self._selist = utils.parseList(selist, delimeter = ',', onEmpty = None) if not (self._path and self._events): raise ConfigError('Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]')
def __init__(self, config, datasetExpr, datasetNick=None): ds_config = config.changeView(viewClass='TaggedConfigView', addNames=[md5_hex(datasetExpr)]) if os.path.isdir(datasetExpr): scan_pipeline = ['OutputDirsFromWork'] ds_config.set('source directory', datasetExpr) datasetExpr = os.path.join(datasetExpr, 'work.conf') else: scan_pipeline = ['OutputDirsFromConfig', 'MetadataFromTask'] datasetExpr, selector = utils.optSplit(datasetExpr, '%') ds_config.set('source config', datasetExpr) ds_config.set('source job selector', selector) ext_config = create_config(datasetExpr) ext_task_name = ext_config.changeView(setSections=['global']).get( ['module', 'task']) if 'ParaMod' in ext_task_name: # handle old config files ext_task_name = ext_config.changeView( setSections=['ParaMod']).get('module') ext_task_cls = Plugin.getClass(ext_task_name) for ext_task_cls in Plugin.getClass(ext_task_name).iterClassBases(): try: scan_holder = GCProviderSetup.getClass('GCProviderSetup_' + ext_task_cls.__name__) except PluginError: continue scan_pipeline += scan_holder.scan_pipeline break ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick, scan_pipeline)
def __init__(self, config, datasetExpr, datasetNick = None): DataProvider.__init__(self, config, datasetExpr, datasetNick) (self._path, self._events, selist) = utils.optSplit(datasetExpr, '|@') self._selist = parseList(selist, ',') or None if not (self._path and self._events): raise ConfigError('Invalid dataset expression!\nCorrect: /local/path/to/file|events[@SE1,SE2]')
def __init__(self, config, datasetExpr, datasetNick = None): self._changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = self._changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = self._changeTrigger) config.set('phedex sites matcher mode', 'shell', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*', defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = self._changeTrigger) self._onlyComplete = config.getBool('only complete sites', True, onChange = self._changeTrigger) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = self._changeTrigger) self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas') self._sitedb = SiteDB() (self._datasetPath, self._datasetInstance, self._datasetBlock) = optSplit(datasetExpr, '@#') instance_default = config.get('dbs instance', '', onChange = self._changeTrigger) self._datasetInstance = self._datasetInstance or instance_default if not self._datasetInstance: self._datasetInstance = 'prod/global' elif '/' not in self._datasetInstance: self._datasetInstance = 'prod/%s' % self._datasetInstance self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True, onChange = self._changeTrigger)
def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0): self._lumi_filter = parseLumiFilter(config.get('lumi filter', '')) if self._lumi_filter: config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self._lumi_query = config.getBool('lumi metadata', self._lumi_filter != []) self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC', defaultMatcher='blackwhite', defaultFilter='weak') self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL', defaultMatcher='blackwhite', defaultFilter='weak') self._phedexT1Mode = config.get('phedex t1 mode', 'disk').lower() self.onlyComplete = config.getBool('only complete sites', True) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname) (self._datasetPath, self._url, self._datasetBlock) = utils.optSplit(datasetExpr, '@#') self._url = self._url or config.get('dbs instance', '') self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True)
def __init__(self, baseClass, value, config, tags, inherit, defaultName, pluginPaths): (self._baseClass, self._config, self._tags, self._inherit, self._pluginPaths) = \ (baseClass, config, tags, inherit, pluginPaths) (self._instClassName, self._instName) = utils.optSplit(value, ':') if self._instName == '': if not defaultName: self._instName = self._instClassName.split('.')[-1] # Default: (non fully qualified) class name as instance name else: self._instName = defaultName
def _setup(self, setup_key, setup_mod): if setup_key: (delim, ds, de) = utils.optSplit(setup_key, '::') modifier = identity if setup_mod and (setup_mod.strip() != 'value'): try: modifier = eval('lambda value: ' + setup_mod) # pylint:disable=eval-used except Exception: raise ConfigError('Unable to parse delimeter modifier %r' % setup_mod) return (delim, parseStr(ds, int), parseStr(de, int), modifier)
def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0): changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser=parseLumiFilter, strfun=strLumi, onChange=changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange=changeTrigger) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC', defaultMatcher='blackwhite', defaultFilter='weak', onChange=changeTrigger) self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL', defaultMatcher='blackwhite', defaultFilter='weak', onChange=changeTrigger) self._phedexT1Mode = config.getEnum('phedex t1 mode', PhedexT1Mode, PhedexT1Mode.disk, onChange=changeTrigger) self.onlyComplete = config.getBool('only complete sites', True, onChange=changeTrigger) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange=changeTrigger) self._pjrc = JSONRestClient( url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas' ) (self._datasetPath, self._url, self._datasetBlock) = optSplit(datasetExpr, '@#') self._url = self._url or config.get('dbs instance', '') self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True, onChange=changeTrigger)
def __init__(self, config, datasetExpr, datasetNick=None): self._changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser=parseLumiFilter, strfun=strLumi, onChange=self._changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange=self._changeTrigger) config.set('phedex sites matcher mode', 'shell', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*', defaultMatcher='blackwhite', defaultFilter='strict', onChange=self._changeTrigger) self._onlyComplete = config.getBool('only complete sites', True, onChange=self._changeTrigger) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange=self._changeTrigger) self._pjrc = JSONRestClient( url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas' ) self._sitedb = SiteDB() (self._datasetPath, self._datasetInstance, self._datasetBlock) = optSplit(datasetExpr, '@#') instance_default = config.get('dbs instance', '', onChange=self._changeTrigger) self._datasetInstance = self._datasetInstance or instance_default if not self._datasetInstance: self._datasetInstance = 'prod/global' elif '/' not in self._datasetInstance: self._datasetInstance = 'prod/%s' % self._datasetInstance self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True, onChange=self._changeTrigger)
def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) self._CommonPrefix = max(self.enumValues) + 1 self._CommonMetadata = max(self.enumValues) + 2 self._handleEntry = { 'events': (DataProvider.NEntries, int, 'block entry counter'), 'id': (DataProvider.DatasetID, int, 'dataset ID'), 'metadata': (DataProvider.Metadata, parseJSON, 'metadata description'), 'metadata common': (self._CommonMetadata, parseJSON, 'common metadata'), 'nickname': (DataProvider.Nickname, str, 'dataset nickname'), 'prefix': (self._CommonPrefix, str, 'common prefix'), 'se list': (DataProvider.Locations, lambda value: parseList(value, ','), 'block location'), } (path, self._forcePrefix, self._filter) = utils.optSplit(datasetExpr, '@%') self._filename = config.resolvePath(path, True, 'Error resolving dataset file: %s' % path)
def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): if os.path.isdir(datasetExpr): GCProvider.stageDir[None] = ['OutputDirsFromWork'] config.set('source directory', datasetExpr) datasetExpr = os.path.join(datasetExpr, 'work.conf') else: GCProvider.stageDir[None] = ['OutputDirsFromConfig', 'MetadataFromTask'] datasetExpr, selector = utils.optSplit(datasetExpr, '%') config.set('source config', datasetExpr) config.set('source job selector', selector) extConfig = createConfig(datasetExpr) extModule = extConfig.changeView(setSections = ['global']).get(['task', 'module']) if 'ParaMod' in extModule: # handle old config files extModule = extConfig.changeView(setSections = ['ParaMod']).get('module') sGet = lambda scannerDict: scannerDict.get(None) + scannerDict.get(extModule, []) sList = sGet(GCProvider.stageDir) + ['JobInfoFromOutputDir', 'FilesFromJobInfo'] + sGet(GCProvider.stageFile) + ['DetermineEvents', 'AddFilePrefix'] ScanProviderBase.__init__(self, config, sList, datasetNick, datasetID)
def __init__(self, config, datasetExpr, datasetNick = None): DataProvider.__init__(self, config, datasetExpr, datasetNick) self._CommonPrefix = max(self.enumValues) + 1 self._CommonMetadata = max(self.enumValues) + 2 self._handleEntry = { 'events': (DataProvider.NEntries, int, 'block entry counter'), 'id': (None, None, 'dataset ID'), # legacy key - skip 'metadata': (DataProvider.Metadata, parseJSON, 'metadata description'), 'metadata common': (self._CommonMetadata, parseJSON, 'common metadata'), 'nickname': (DataProvider.Nickname, str, 'dataset nickname'), 'prefix': (self._CommonPrefix, str, 'common prefix'), 'se list': (DataProvider.Locations, lambda value: parseList(value, ','), 'block location'), } (path, self._forcePrefix, self._filter) = utils.optSplit(datasetExpr, '@%') self._filename = config.resolvePath(path, True, 'Error resolving dataset file: %s' % path)
def __init__(self, config, datasetExpr, datasetNick, datasetID = 0): if os.path.isdir(datasetExpr): GCProvider.stageDir[None] = ['OutputDirsFromWork'] config.set('source directory', datasetExpr) datasetExpr = os.path.join(datasetExpr, 'work.conf') else: GCProvider.stageDir[None] = ['OutputDirsFromConfig', 'MetadataFromTask'] datasetExpr, selector = utils.optSplit(datasetExpr, '%') config.set('source config', datasetExpr) config.set('source job selector', selector) extConfig = Config([DefaultFilesConfigFiller(), FileConfigFiller([datasetExpr])], datasetExpr) extModule = extConfig.get('global', ['task', 'module']) if 'ParaMod' in extModule: extModule = extConfig.get('ParaMod', 'module') sGet = lambda scannerDict: scannerDict.get(None) + scannerDict.get(extModule, []) sList = sGet(GCProvider.stageDir) + ['FilesFromJobInfo'] + sGet(GCProvider.stageFile) + ['DetermineEvents', 'AddFilePrefix'] ScanProviderBase.__init__(self, config, sList, datasetNick, datasetID)
def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): self._lumi_filter = parseLumiFilter(config.get('lumi filter', '')) if self._lumi_filter: config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self._lumi_query = config.getBool('lumi metadata', self._lumi_filter != []) self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC', defaultMatcher = 'blackwhite', defaultFilter = 'weak') self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL', defaultMatcher = 'blackwhite', defaultFilter = 'weak') self._phedexT1Mode = config.get('phedex t1 mode', 'disk').lower() self.onlyComplete = config.getBool('only complete sites', True) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname) (self._datasetPath, self._url, self._datasetBlock) = utils.optSplit(datasetExpr, '@#') self._url = self._url or config.get('dbs instance', '') self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True)
def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = changeTrigger) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC', defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger) self._phedexT1Filter = config.getFilter('phedex t1 accept', 'T1_DE_KIT T1_US_FNAL', defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger) self._phedexT1Mode = config.getEnum('phedex t1 mode', PhedexT1Mode, PhedexT1Mode.disk, onChange = changeTrigger) self.onlyComplete = config.getBool('only complete sites', True, onChange = changeTrigger) self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = changeTrigger) self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas') (self._datasetPath, self._url, self._datasetBlock) = optSplit(datasetExpr, '@#') self._url = self._url or config.get('dbs instance', '') self._datasetBlock = self._datasetBlock or 'all' self.onlyValid = config.getBool('only valid', True, onChange = changeTrigger)
def __init__(self, config, datasetExpr, datasetNick = None): ds_config = config.changeView(viewClass = 'TaggedConfigView', addNames = [md5_hex(datasetExpr)]) if os.path.isdir(datasetExpr): scan_pipeline = ['OutputDirsFromWork'] ds_config.set('source directory', datasetExpr) datasetExpr = os.path.join(datasetExpr, 'work.conf') else: scan_pipeline = ['OutputDirsFromConfig', 'MetadataFromTask'] datasetExpr, selector = utils.optSplit(datasetExpr, '%') ds_config.set('source config', datasetExpr) ds_config.set('source job selector', selector) ext_config = create_config(datasetExpr) ext_task_name = ext_config.changeView(setSections = ['global']).get(['module', 'task']) if 'ParaMod' in ext_task_name: # handle old config files ext_task_name = ext_config.changeView(setSections = ['ParaMod']).get('module') ext_task_cls = Plugin.getClass(ext_task_name) for ext_task_cls in Plugin.getClass(ext_task_name).iterClassBases(): try: scan_holder = GCProviderSetup.getClass('GCProviderSetup_' + ext_task_cls.__name__) except PluginError: continue scan_pipeline += scan_holder.scan_pipeline break ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick, scan_pipeline)
def __init__(self, config, datasetExpr, datasetNick, datasetID = 0): DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self.phedexBL = config.getList('phedex sites', ['-T3_US_FNALLPC']) self.phedexWL = config.getList('phedex t1 accept', ['T1_DE_KIT', 'T1_US_FNAL']) self.phedexT1 = config.get('phedex t1 mode', 'disk').lower() self.onlyComplete = config.getBool('only complete sites', True) self.locationFormat = config.get('location format', 'hostname').lower() # hostname or sitedb if self.locationFormat not in ['hostname', 'sitedb', 'both']: raise ConfigError('Invalid location format: %s' % self.locationFormat) (self.datasetPath, self.url, self.datasetBlock) = utils.optSplit(datasetExpr, '@#') self.url = QM(self.url, self.url, config.get('dbs instance', '')) self.datasetBlock = QM(self.datasetBlock, self.datasetBlock, 'all') self.includeLumi = config.getBool('keep lumi metadata', False) self.onlyValid = config.getBool('only valid', True) self.checkUnique = config.getBool('check unique', True) # This works in tandem with active task module (cmssy.py supports only [section] lumi filter!) self.selectedLumis = parseLumiFilter(config.get('lumi filter', '')) if self.selectedLumis: utils.vprint('Runs/lumi section filter enabled! (%d entries)' % len(self.selectedLumis), -1, once = True) utils.vprint('\tThe following runs and lumi sections are selected:', 1, once = True) utils.vprint('\t' + utils.wrapList(formatLumi(self.selectedLumis), 65, ',\n\t'), 1, once = True)
def splitParse(opt): (delim, ds, de) = utils.optSplit(opt, '::') return (delim, parseStr(ds, int), parseStr(de, int))
def splitParse(opt): (delim, ds, de) = utils.optSplit(opt, '::') return (delim, utils.parseInt(ds), utils.parseInt(de))
def __init__(self, config, datasetExpr, datasetNick, datasetID = 0): DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) (path, self._forcePrefix, self._filter) = utils.optSplit(datasetExpr, '@%') self._filename = config.resolvePath(path, True, 'Error resolving dataset file: %s' % path)