def __init__(self, config, datasource_name, repository, keep_old=True): BaseDataParameterSource.__init__(self, config, datasource_name, repository) # hide provider property set by __new__ self._provider = self.provider del self.provider if self._provider.need_init_query(): self._provider.get_block_list_cached(show_stats=False) data_src_text = 'Dataset source %r' % datasource_name # Select dataset refresh rate data_refresh = config.get_time('%s refresh' % datasource_name, -1, on_change=None) if data_refresh >= 0: data_refresh = max(data_refresh, self._provider.get_query_interval()) self._log.info('%s will be queried every %s', data_src_text, str_time_long(data_refresh)) self.setup_resync(interval=data_refresh, force=config.get_state('resync', detail='datasets')) splitter_name = config.get('%s splitter' % datasource_name, 'FileBoundarySplitter') splitter_cls = self._provider.check_splitter(DataSplitter.get_class(splitter_name)) self._splitter = splitter_cls(config, datasource_name) # Settings: (self._dn, self._keep_old) = (config.get_work_path(), keep_old) ensure_dir_exists(self._dn, 'partition map directory', DatasetError) self._set_reader(self._init_reader()) if not self.get_parameter_len(): if data_refresh < 0: raise UserError('%s does not provide jobs to process' % data_src_text) self._log.warning('%s does not provide jobs to process', data_src_text)
def __init__(self, config, datasource_name, repository, keep_old=True): BaseDataParameterSource.__init__(self, config, datasource_name, repository) # hide provider property set by __new__ self._provider = self.provider del self.provider if self._provider.need_init_query(): self._provider.get_block_list_cached(show_stats=False) data_src_text = 'Dataset source %r' % datasource_name # Select dataset refresh rate data_refresh = config.get_time('%s refresh' % datasource_name, -1, on_change=None) if data_refresh >= 0: data_refresh = max(data_refresh, self._provider.get_query_interval()) self._log.info('%s will be queried every %s', data_src_text, str_time_long(data_refresh)) self.setup_resync(interval=data_refresh, force=config.get_state('resync', detail='datasets')) splitter_name = config.get('%s splitter' % datasource_name, 'FileBoundarySplitter') splitter_cls = self._provider.check_splitter( DataSplitter.get_class(splitter_name)) self._splitter = splitter_cls(config, datasource_name) # Settings: (self._dn, self._keep_old) = (config.get_work_path(), keep_old) ensure_dir_exists(self._dn, 'partition map directory', DatasetError) self._set_reader(self._init_reader()) if not self.get_parameter_len(): if data_refresh < 0: raise UserError('%s does not provide jobs to process' % data_src_text) self._log.warning('%s does not provide jobs to process', data_src_text)
# | Licensed under the Apache License, Version 2.0 (the "License"); # | you may not use this file except in compliance with the License. # | You may obtain a copy of the License at # | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.datasets import DataProvider, DataSplitter from python_compat import imap FileClassSplitter = DataSplitter.get_class('FileClassSplitter') # pylint:disable=invalid-name class RunSplitter(FileClassSplitter): alias_list = ['runs'] def __init__(self, config, datasource_name): FileClassSplitter.__init__(self, config, datasource_name) self._run_range = config.get_lookup(self._get_part_opt('run range'), {None: 1}, parser=int, strfun=int.__str__) def _get_fi_class(self, fi, block): run_range = self._run_range.lookup(DataProvider.get_block_id(block)) metadata_idx = block[DataProvider.Metadata].index('Runs')
# | you may not use this file except in compliance with the License. # | You may obtain a copy of the License at # | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.datasets import DataProvider, DataSplitter from python_compat import imap FileClassSplitter = DataSplitter.get_class('FileClassSplitter') # pylint:disable=invalid-name class RunSplitter(FileClassSplitter): alias_list = ['runs'] def __init__(self, config, datasource_name): FileClassSplitter.__init__(self, config, datasource_name) self._run_range = config.get_lookup(self._get_part_opt('run range'), {None: 1}, parser=int, strfun=int.__str__) def _get_fi_class(self, fi, block): run_range = self._run_range.lookup(DataProvider.get_block_id(block)) metadata_idx = block[DataProvider.Metadata].index('Runs') return tuple(imap(lambda r: int(r / run_range), fi[DataProvider.Metadata][metadata_idx]))