def __init__(self, config, datasetExpr, datasetNick=None): ds_config = config.changeView(viewClass='TaggedConfigView', addNames=[md5_hex(datasetExpr)]) if os.path.isdir(datasetExpr): scan_pipeline = ['OutputDirsFromWork'] ds_config.set('source directory', datasetExpr) datasetExpr = os.path.join(datasetExpr, 'work.conf') else: scan_pipeline = ['OutputDirsFromConfig', 'MetadataFromTask'] datasetExpr, selector = utils.optSplit(datasetExpr, '%') ds_config.set('source config', datasetExpr) ds_config.set('source job selector', selector) ext_config = create_config(datasetExpr) ext_task_name = ext_config.changeView(setSections=['global']).get( ['module', 'task']) if 'ParaMod' in ext_task_name: # handle old config files ext_task_name = ext_config.changeView( setSections=['ParaMod']).get('module') ext_task_cls = Plugin.getClass(ext_task_name) for ext_task_cls in Plugin.getClass(ext_task_name).iterClassBases(): try: scan_holder = GCProviderSetup.getClass('GCProviderSetup_' + ext_task_cls.__name__) except PluginError: continue scan_pipeline += scan_holder.scan_pipeline break ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick, scan_pipeline)
def gc_create_config(cmd_line_args=None, **kwargs): # create config instance if cmd_line_args is not None: (_, args) = _parse_cmd_line(cmd_line_args) kwargs.setdefault('config_file', args[0]) kwargs.setdefault('additional', []).append(OptsConfigFiller(cmd_line_args)) return create_config(register=True, **kwargs)
def __init__(self, config): InfoScanner.__init__(self, config) ext_config_fn = config.getPath('source config', onChange=triggerDataResync) ext_config = create_config( ext_config_fn, useDefaultFiles=True).changeView(setSections=['global']) self._extWorkDir = ext_config.getWorkPath() logging.getLogger().disabled = True self._extWorkflow = ext_config.getPlugin('workflow', 'Workflow:global', cls='Workflow', pargs=('task', )) logging.getLogger().disabled = False self._extTask = self._extWorkflow.task selector = config.get('source job selector', '', onChange=triggerDataResync) ext_job_db = ext_config.getPlugin( 'job database', 'TextFileJobDB', cls='JobDB', pkwargs={ 'jobSelector': lambda jobNum, jobObj: jobObj.state == Job.SUCCESS }, onChange=None) self._selected = sorted( ext_job_db.getJobs(JobSelector.create(selector, task=self._extTask)))
def gc_create_config(cmd_line_args=None, **kwargs): if cmd_line_args is not None: (_, args) = parse_cmd_line(cmd_line_args) kwargs.setdefault('configFile', args[0]) kwargs.setdefault('additional', []).append(OptsConfigFiller(cmd_line_args)) return create_config(register=True, **kwargs)
def loadFromFile(path): return DataProvider.createInstance( 'ListProvider', create_config(configDict={ 'dataset': { 'dataset processor': 'NullDataProcessor' } }), path)
def __init__(self, url): self._reader_url = '%s/%s' % (url, 'DBSReader') self._writer_url = '%s/%s' % (url, 'DBSWriter') self._migrate_url = '%s/%s' % (url, 'DBSMigrate') self._gjrc = GridJSONRestClient( get_cms_cert(create_config()), cert_error_msg='VOMS proxy needed to query DBS3!', cert_error_cls=UserError)
def __init__(self, config): config.set('jobs', 'monitor', 'dashboard', override=False) config.set('grid', 'sites', '-samtest -cmsprodhi', append=True) site_db = SiteDB() token = AccessToken.create_instance('VomsProxy', create_config(), 'token') self._hn_name = site_db.dn_to_username(token.get_fq_user_name()) if not self._hn_name: raise ConfigError('Unable to map grid certificate to hn name!')
def getConfig(configFile=None, configDict=None, section=None, additional=None): if configDict and section: configDict = {section: configDict} config = create_config(configFile, configDict, useDefaultFiles=True, additional=additional) if section: return config.changeView(addSections=[section]) return config
def __init__(self, config): config.set('jobs', 'monitor', 'dashboard', override=False) config.set('grid', 'sites', '-samtest -cmsprodhi', append=True) site_db = CRIC() token = AccessToken.create_instance('VomsProxy', create_config(), 'token') self._hn_name = site_db.dn_to_username(token.get_fq_user_name()) if not self._hn_name: raise ConfigError('Unable to map grid certificate to hn name!')
def load_from_file(path): # Load dataset information using ListProvider return DataProvider.create_instance( 'ListProvider', create_config(load_old_config=False, config_dict={ 'dataset': { 'dataset processor': 'NullDataProcessor' } }), 'dataset', path)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._parent_source = config.get('parent source', '') self._parent_keys = config.get_list('parent keys', []) self._parent_match_level = config.get_int('parent match level', 1) self._parent_merge = config.get_bool('merge parents', False) # cached "parent lfn parts" (plfnp) to "parent dataset name" (pdn) maps self._plfnp2pdn_cache = {} # the maps are stored for different parent_dataset_expr self._empty_config = create_config(use_default_files=False, load_old_config=False) self._read_plfnp_map(config, self._parent_source) # read from configured parent source
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._parent_source = config.get('parent source', '') self._parent_keys = config.get_list('parent keys', []) self._parent_match_level = config.get_int('parent match level', 1) self._parent_merge = config.get_bool('merge parents', False) # cached "parent lfn parts" (plfnp) to "parent dataset name" (pdn) maps self._plfnp2pdn_cache = { } # the maps are stored for different parent_dataset_expr self._empty_config = create_config(use_default_files=False, load_old_config=False) self._read_plfnp_map( config, self._parent_source) # read from configured parent source
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): dataset_config = config.change_view( default_on_change=TriggerResync(['datasets', 'parameters'])) self._lumi_filter = dataset_config.get_lookup( ['lumi filter', '%s lumi filter' % datasource_name], default={}, parser=parse_lumi_filter, strfun=str_lumi) if not self._lumi_filter.empty(): config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = dataset_config.get_bool( ['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty()) config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedex_filter = dataset_config.get_filter( 'phedex sites', '-* T1_*_Disk T2_* T3_*', default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter') self._only_complete = dataset_config.get_bool('only complete sites', True) self._only_valid = dataset_config.get_bool('only valid', True) self._allow_phedex = dataset_config.get_bool('allow phedex', True) self._location_format = dataset_config.get_enum( 'location format', CMSLocationFormat, CMSLocationFormat.hostname) self._sitedb = CRIC() token = AccessToken.create_instance('VomsProxy', create_config(), 'token') self._rucio = Client( account=self._sitedb.dn_to_username(token.get_fq_user_name())) dataset_expr_parts = split_opt(dataset_expr, '@#') (self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts instance_default = dataset_config.get('dbs instance', '') self._dataset_instance = self._dataset_instance or instance_default if not self._dataset_instance: self._dataset_instance = 'prod/global' elif '/' not in self._dataset_instance: self._dataset_instance = 'prod/%s' % self._dataset_instance self._dataset_block_selector = self._dataset_block_selector or 'all'
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) ext_config_fn = config.get_fn('source config') ext_config_raw = create_config(ext_config_fn, load_only_old_config=True) ext_config = ext_config_raw.change_view(set_sections=['global']) self._ext_work_dn = ext_config.get_work_path() logging.getLogger().disabled = True ext_workflow = ext_config.get_plugin('workflow', 'Workflow:global', cls='Workflow', pkwargs={'backend': 'NullWMS'}) logging.getLogger().disabled = False self._ext_task = ext_workflow.task job_selector = JobSelector.create(config.get('source job selector', ''), task=self._ext_task) self._selected = sorted(ext_workflow.job_manager.job_db.get_job_list(AndJobSelector( ClassSelector(JobClass.SUCCESS), job_selector)))
def __init__(self, config): InfoScanner.__init__(self, config) self._source = config.get('parent source', '', onChange=triggerDataResync) self._parentKeys = config.getList('parent keys', [], onChange=triggerDataResync) self._looseMatch = config.getInt('parent match level', 1, onChange=triggerDataResync) self._merge = config.getBool('merge parents', False, onChange=triggerDataResync) self._lfnMapCache = {} self._empty_config = create_config() self._readParents(config, self._source)
def loadPartitionsForScript(path, cfg = None): src = DataSplitterIO.createInstance('DataSplitterIOAuto').loadSplitting(path) # Transfer config protocol (in case no split function is called) protocol = {} for (section, options) in src.metadata.items(): section = section.replace('dataset', '').strip() for (option, value) in options.items(): if section: option = '[%s] %s' % (section, option) protocol[option.strip()] = value if cfg is not None: cfg.set(option, str(value)) # Create and setup splitter if cfg is None: cfg = create_config(configDict = src.metadata) splitter = DataSplitter.createInstance(src.classname, cfg) splitter.setState(src, protocol) return splitter
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) ext_config_fn = config.get_fn('source config') ext_config_raw = create_config(ext_config_fn, load_only_old_config=True) ext_config = ext_config_raw.change_view(set_sections=['global']) self._ext_work_dn = ext_config.get_work_path() logging.getLogger().disabled = True ext_workflow = ext_config.get_plugin('workflow', 'Workflow:global', cls='Workflow', pkwargs={'backend': 'NullWMS'}) logging.getLogger().disabled = False self._ext_task = ext_workflow.task job_selector = JobSelector.create(config.get('source job selector', ''), task=self._ext_task) self._selected = sorted( ext_workflow.job_manager.job_db.get_job_list( AndJobSelector(ClassSelector(JobClass.SUCCESS), job_selector)))
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): ds_config = config.change_view(view_class='TaggedConfigView', add_names=[md5_hex(dataset_expr)]) if os.path.isdir(dataset_expr): scanner_list = ['OutputDirsFromWork'] ds_config.set('source directory', dataset_expr) dataset_expr = os.path.join(dataset_expr, 'work.conf') else: scanner_list = ['OutputDirsFromConfig', 'MetadataFromTask'] dataset_expr, selector = split_opt(dataset_expr, '%') ds_config.set('source config', dataset_expr) ds_config.set('source job selector', selector) ext_config = create_config(dataset_expr) ext_task_name = ext_config.change_view(set_sections=['global']).get(['module', 'task']) ext_task_cls = Plugin.get_class(ext_task_name) for ext_task_cls in Plugin.get_class(ext_task_name).iter_class_bases(): scan_setup_name = 'GCProviderSetup_' + ext_task_cls.__name__ scan_setup_cls = GCProviderSetup.get_class(scan_setup_name, ignore_missing=True) if scan_setup_cls: scanner_list += scan_setup_cls.scanner_list break ScanProviderBase.__init__(self, ds_config, datasource_name, dataset_expr, dataset_nick, dataset_proc, scanner_list)
def __init__(self, config, datasetExpr, datasetNick = None): ds_config = config.changeView(viewClass = 'TaggedConfigView', addNames = [md5_hex(datasetExpr)]) if os.path.isdir(datasetExpr): scan_pipeline = ['OutputDirsFromWork'] ds_config.set('source directory', datasetExpr) datasetExpr = os.path.join(datasetExpr, 'work.conf') else: scan_pipeline = ['OutputDirsFromConfig', 'MetadataFromTask'] datasetExpr, selector = utils.optSplit(datasetExpr, '%') ds_config.set('source config', datasetExpr) ds_config.set('source job selector', selector) ext_config = create_config(datasetExpr) ext_task_name = ext_config.changeView(setSections = ['global']).get(['module', 'task']) if 'ParaMod' in ext_task_name: # handle old config files ext_task_name = ext_config.changeView(setSections = ['ParaMod']).get('module') ext_task_cls = Plugin.getClass(ext_task_name) for ext_task_cls in Plugin.getClass(ext_task_name).iterClassBases(): try: scan_holder = GCProviderSetup.getClass('GCProviderSetup_' + ext_task_cls.__name__) except PluginError: continue scan_pipeline += scan_holder.scan_pipeline break ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick, scan_pipeline)
def get_cms_cert(config=None, ignore_errors=False): logging.getLogger('access.cms-proxy').setLevel(logging.ERROR) if not ignore_errors: return _get_cms_cert(config or create_config()) return ignore_exception(Exception, None, _get_cms_cert, config or create_config())
def loadFromFile(path): return DataProvider.createInstance('ListProvider', create_config( configDict = {'dataset': {'dataset processor': 'NullDataProcessor'}}), path)
def __init__(self, url): self._reader_url = '%s/%s' % (url, 'DBSReader') self._writer_url = '%s/%s' % (url, 'DBSWriter') self._migrate_url = '%s/%s' % (url, 'DBSMigrate') self._gjrc = GridJSONRestClient(get_cms_cert(create_config()), cert_error_msg='VOMS proxy needed to query DBS3!', cert_error_cls=UserError)