def _get_default_func_or_section_config( self, section_path_str: str = None, create: bool = False ) -> Union[ActiveFunctionConfig, ConfigSection]: if section_path_str is None: # local config. Default is blank config return ActiveFunctionConfig() else: # otherwise, load from file for default section_path = SectionPath(section_path_str) filepath = section_path.to_filepath(self.basepath) try: config_obj = _get_from_nested_obj_by_section_path( self, section_path) except KeyError as e: # config object not already created if not create: raise e config_obj = ActiveFunctionConfig() if isinstance(config_obj, ConfigSection): return ConfigSection.from_files(filepath) if isinstance(config_obj, (ActiveFunctionConfig, ActiveFunctionConfigFile)): return ActiveFunctionConfig.from_file(filepath + '.py') else: raise ValueError( f'expected section path to return ConfigSection or FunctionConfig, ' f'got {config_obj} of type {type(config_obj)}')
def _get_collection_obj_and_relative_section_path_from_structure(self, section_path_str: str): # Handle accessing correct collection object. section_path = SectionPath(section_path_str) manager_name = section_path[0] if len(section_path) == 1: # Got only the manager, e.g. project # return only the manager itself return self._managers[manager_name], None if section_path[1] in self._managers[manager_name].specific_class_names: # got a specific class path if len(section_path) == 2: # got only the root data path, e.g. project.sources # return the collection object itself return self._structure[manager_name][section_path[1]], None collection_name = section_path[1] section_path_begin_index = 2 else: collection_name = '_general' section_path_begin_index = 1 relative_section_path = SectionPath('.'.join(section_path[section_path_begin_index:])) collection_obj = self._structure[manager_name][collection_name] return collection_obj, relative_section_path
def test_update_batch_with_plugins(self): self.add_plugin() self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() pipeline_manager.create('stuff2', a_function) sel = Selector() ivs = [ sel.test_pipeline_manager.stuff.a_function, sel.test_pipeline_manager.stuff2.a_function, ] expected_b_result = ["a", "b"] updates = [] for iv in ivs: section_path = SectionPath.from_section_str_list( SectionPath(iv.section_path_str)[1:]) updates.append( dict(b=expected_b_result, section_path_str=section_path.path_str)) pipeline_manager.update_batch(updates) for iv in ivs: result = pipeline_manager.run(iv) assert result == [ (None, OVERRIDDEN_B_RESULT), (None, OVERRIDDEN_B_RESULT), "abc", ] assert PRE_UPDATE_BATCH_COUNTER == 1 assert POST_UPDATE_BATCH_COUNTER == 1
def test_create_then_update_entry_for_specific_class_dict(self): self.write_example_class_dict_to_file() pipeline_manager = self.create_pm( specific_class_config_dicts=CLASS_CONFIG_DICT_LIST) pipeline_manager.load() sel = Selector() pipeline_manager.create('example_class.thing.data') iv = sel.test_pipeline_manager.example_class.thing.data expected_a_result = (1, 2) section_path = SectionPath.from_section_str_list( SectionPath(iv.section_path_str)[1:]) pipeline_manager.update(a=expected_a_result, section_path_str=section_path.path_str) class_folder = os.path.join(self.defaults_path, 'example_class') module_folder = os.path.join(class_folder, 'thing') class_path = os.path.join(module_folder, 'data.py') with open(class_path, 'r') as f: contents = f.read() self.assert_example_class_dict_config_file_contents(contents) ec = sel.test_pipeline_manager.example_class.thing.data result = pipeline_manager.run(ec) assert result == 'woo' expect_ec = ExampleClass(name='data', a=expected_a_result) got_ec = ec.item assert ec.name == expect_ec.name == got_ec.name assert ec.a == expect_ec.a == got_ec.a
def from_file_path(cls, file_path: str, action: PyfileconfActions): from pyfileconf import PipelineManager from pyfileconf.sectionpath.sectionpath import SectionPath dependent_manager = PipelineManager.get_manager_by_filepath(file_path) dependent_sp = SectionPath.from_filepath( dependent_manager.default_config_path, file_path) full_sp = SectionPath.join(dependent_manager.name, dependent_sp) return cls(full_sp, action, file_path=file_path)
def _should_skip_module(name: str) -> bool: """ Check if module section path ends with a section path in skip_models """ module_section_path = SectionPath(name) for skip_name in skip_modules: skip_sp = SectionPath(skip_name) if module_section_path.endswith(skip_sp): return True return False
def reset(self, section_path_strs: Iterable[str]): from pyfileconf.main import PipelineManager for sp_str in section_path_strs: sp = SectionPath.from_ambiguous( sp_str, base_section_path_str=self.base_section_path_str, strip_manager_from_iv=self.strip_manager_from_iv) pm = PipelineManager.get_manager_by_section_path_str(sp.path_str) relative_section_path_str = SectionPath(".".join(sp[1:])).path_str pm.reset(relative_section_path_str)
def _get_real_item(self, item): from pyfileconf import context from pyfileconf.main import PipelineManager manager = PipelineManager.get_manager_by_section_path_str(item) relative_section_path = SectionPath('.'.join(SectionPath(item)[1:])) if context.file_is_currently_being_loaded: context.add_config_dependency( context.stack.currently_loading_file_section_path, item, force_update=True ) return _get_from_nested_obj_by_section_path(manager, relative_section_path)
def test_update_no_plugins(self): self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.stuff.a_function expected_b_result = ["a", "b"] section_path = SectionPath.from_section_str_list( SectionPath(iv.section_path_str)[1:]) pipeline_manager.update(b=expected_b_result, section_path_str=section_path.path_str) result = pipeline_manager.run(iv) assert result == (None, expected_b_result) assert PRE_UPDATE_COUNTER == 0 assert POST_UPDATE_COUNTER == 0
def _get_section_path_str_from_section_path_str_or_view( self, section_path_str_or_view: 'StrOrView') -> str: from pyfileconf.selector.models.itemview import is_item_view if is_item_view(section_path_str_or_view): # ItemView will have PipelineManager.name as first section, must strip section_path = SectionPath( section_path_str_or_view.section_path_str) # type: ignore relative_section_path = SectionPath.from_section_str_list( section_path[1:]) return relative_section_path.path_str elif isinstance(section_path_str_or_view, str): return section_path_str_or_view else: raise ValueError( f'expected str or ItemView. Got {section_path_str_or_view} of ' f'type {type(section_path_str_or_view)}')
def _refresh_configs(section_paths: Iterable[SectionPath]): from pyfileconf import PipelineManager for sp in section_paths: manager = PipelineManager.get_manager_by_section_path_str(sp.path_str) relative_section_path = SectionPath('.'.join(sp[1:])) manager.refresh(relative_section_path.path_str)
def _get_func_or_section_configs( self, section_path_str: str) -> Optional[ActiveFunctionConfig]: """ This get method is used to get only the config for the section path, without handling multiple levels of config and overriding. To get the active config for a function, use regular get method. Args: section_path_str: Returns: """ if self.section is None: raise ConfigManagerNotLoadedException( 'call .load() on ConfigManager before .get()') if section_path_str is None: section_path_str = self.section.name section_path = SectionPath(section_path_str) # Goes into nested sections, until it pulls the final config or section config_or_section: ConfigSectionOrConfig = _get_from_nested_obj_by_section_path( self, section_path) conf = _get_config_from_config_or_section(config_or_section) # Now update stored config as loading may have happened during _get_config_from_config_or_section # Want to keep the active config once it is loaded # But if it is a section, then don't want to overwrite with config if not isinstance(config_or_section, ConfigSection): _set_in_nested_obj_by_section_path(self, section_path, conf) return conf
def __init__( self, section_path_str_or_list: RunnerArgs, config_updates: Sequence[Dict[str, Any]], base_section_path_str: Optional[str] = None, strip_manager_from_iv: bool = False, ): """ :param section_path_str_or_list: . separated name of path of function or section, or list thereof. similar to how a function would be imported. e.g. 'main.data.summarize.summary_func1' or when running multiple functions/sections, e.g. ['main.data', 'main.analysis.reg.1'] :param config_updates: list of kwarg dictionaries which would normally be provided to .update :param base_section_path_str: section path str to put at beginning of all passed section paths :param strip_manager_from_iv: whether to remove manager name from any incoming item views """ self.base_section_path_str = base_section_path_str self.strip_manager_from_iv = strip_manager_from_iv self.run_items = SectionPath.list_from_ambiguous( section_path_str_or_list, base_section_path_str=base_section_path_str, strip_manager_from_iv=strip_manager_from_iv, ) self.config_updates = config_updates self.cases = self.get_cases() self.defaults = self.get_defaults()
def set(self, section_path_str: str, value: Any): section_path = SectionPath(section_path_str) # Goes into nested sections, until it sets the final section or pipeline obj = self section_basepath = self.basepath for i, section in enumerate(section_path): section_basepath = os.path.join(section_basepath, section) try: obj = getattr(obj, section) except AttributeError as e: new_collection = self.collection_class( section_basepath, [], name=section, imports=self.imports, always_assign_strs=self.always_assign_strs, always_import_strs=self.always_import_strs, klass=self.klass, key_attr=self.key_attr, execute_attr=self.execute_attr) obj.append(new_collection) obj = getattr(obj, section) # Now have collection object which should hold this final object obj.append(value)
def test_log_stdout_file(self): pm, iv, _ = self.create_pm_with_function_and_run() assert len(self.mock_logs.messages['info'] ) == 2 # one log to run function, one log for result assert not os.path.exists(self.logs_path) pyfileconf.options.set_options([('log_stdout', True), ('log_folder', self.logs_folder)]) pm.run(iv) assert len( self.mock_logs.messages['info'] ) == 5 # one log from function print in addition to two normal logs assert os.path.exists(self.logs_path) with open(self.logs_path, 'r') as f: contents = f.read() assert contents == '[pyfileconf INFO]: Running function stuff.a_function(\n\ta = None,\n\tb = None\n)\n[pyfileconf INFO]: print\n[pyfileconf INFO]: Result:\n(None, None)\n\n' pm.update(section_path_str=SectionPath('.'.join( iv.section_path_str.split('.')[1:])).path_str, b='raise_error') pm.force_continue = True assert len(self.mock_logs.messages['error']) == 0 pm.run(iv) assert len( self.mock_logs.messages['error'] ) == 2 # one for error at original time, one for error summary with open(self.logs_path, 'r') as f: contents = f.read() assert '[pyfileconf INFO]: Running function stuff.a_function(\n\ta = None,\n\tb = None\n)\n[pyfileconf INFO]: print\n[pyfileconf INFO]: Result:\n(None, None)\n\n[pyfileconf INFO]: Updating stuff.a_function with config: {\'b\': \'raise_error\'}\n[pyfileconf INFO]: Running function stuff.a_function(\n\ta = None,\n\tb = r\'raise_error\'\n)\n[pyfileconf INFO]: print\n[pyfileconf ERROR]: Error while running stuff.a_function:\n\nTraceback (most recent call last):' \ in contents assert 'raise ValueError(\'error was supposed to be raised\')\nValueError: error was supposed to be raised\n\n[pyfileconf ERROR]: Exception summary for running stuff.a_function (exceptions were also shown when raised):\nError while running stuff.a_function:\n\nTraceback (most recent call last):' \ in contents
def _get_func_or_collection(self, section_path_str: str) -> FunctionOrCollection: section_path = SectionPath(section_path_str) registrar_name = section_path[0] # Check for specific class dict matching name for registrar in self._registrars: if registrar.name == registrar_name: lookup_in_registrar_section_path = SectionPath.from_section_str_list(section_path[1:]).path_str if not lookup_in_registrar_section_path: # Was looking up registrar collection itself return registrar.collection # Looking up within registrar return registrar.get(lookup_in_registrar_section_path) # Try to return from general registrar return self._general_registrar.get(section_path_str)
def _run_section(self, section_path_str: str) -> Results: section = self._get_func_or_collection(section_path_str) section = cast(PipelineCollection, section) results = [] for section_or_object_view in section: # Get section path by which to call this item subsection_name = _get_public_name_or_special_name(section_or_object_view, accept_output_names=False) subsection_path_str = SectionPath.join(section_path_str, subsection_name).path_str # Get from object view if necessary if isinstance(section_or_object_view, ObjectView): section_or_callable = section_or_object_view.item else: section_or_callable = section_or_object_view if isinstance(section_or_callable, PipelineCollection): # got another section within this section. recursively call run section results.append(self._run_section(subsection_path_str)) elif self._is_specific_class(section_or_callable): results.append(self._run_one_specific_class(section_path_str)) elif inspect.isclass(section_or_callable): results.append(self._run_one_class(section_path_str)) elif callable(section_or_callable): # run function results.append(self._run_one_func(subsection_path_str)) else: raise ValueError(f'could not run section {subsection_path_str}. expected PipelineCollection or ' f'function or class,' f'got {section_or_callable} of type {type(section_or_callable)}') return results
def add_config_dependency(self, dependent: SectionPathLike, depends_on: SectionPathLike, force_update: bool = False): from pyfileconf.sectionpath.sectionpath import SectionPath dependent_section_path = SectionPath.from_ambiguous(dependent) depends_on_section_path_str = SectionPath.from_ambiguous( depends_on).path_str if dependent_section_path.path_str == depends_on_section_path_str: # Will hit here while running an item, as it gets itself while running # No need to make config dependent on itself return self.config_dependencies[depends_on_section_path_str].add( dependent_section_path) if force_update: self.force_update_dependencies[depends_on_section_path_str].add( dependent_section_path)
def _get_section(self, section_path_str: str): section = self._get_func_or_collection(section_path_str) section = cast(Collection, section) # Need to handle definition structure which can be lists inside dicts or more dicts inside dicts # This method will be called recursively on each section. Check to see if there are any sections # inside this section. If so, then that section would be defined by a dict key in the definition, # and therefore results should be put in a dict. If there are no sections within this section, then # a list was used to store the items in this section and so results will be put in a list. results: Union[Dict[str, Union[Collection, Any]], List[Any]] if any(isinstance(item, Collection) for item in section): results = {} else: results = [] for section_or_object_view in section: # Get from object view if necessary if isinstance(section_or_object_view, ObjectView): section_or_callable = section_or_object_view.item else: section_or_callable = section_or_object_view # Get section path by which to call this item if self._is_specific_class(section_or_callable): # If specific class, need to look up which key holds the name subsection_name = section.name_for_obj(section_or_object_view) else: # If in the main dict, or is a collection, the name attribute or function/class name holds the name subsection_name = _get_public_name_or_special_name(section_or_object_view, accept_output_names=False) subsection_path_str = SectionPath.join(section_path_str, subsection_name).path_str if isinstance(section_or_callable, Collection): # not expected to hit these, for mypy assert isinstance(results, dict) assert section_or_callable.name is not None # got another section within this section. recursively call get section results[section_or_callable.name] = self._get_section(subsection_path_str) elif isinstance(results, dict): # got a non-collection, but results were defined as a dict. Should only have collections in dict raise ValueError( f'section {section_or_object_view.name} has both collections and items, must only ' f'have collections if there is at least one collection. ' f'Got {section_or_callable} as non-collection.' ) elif self._is_specific_class(section_or_callable): results.append(self._get_one_obj_with_config(subsection_path_str)) elif callable(section_or_callable): # get function results.append(self._get_one_func_with_config(subsection_path_str)) else: raise ValueError(f'could not get section {subsection_path_str}. expected Collection or ' f'function or specific class,' f'got {section_or_callable} of type {type(section_or_callable)}') return results
def pyfileconf_post_config_changed( manager: 'ConfigManager', new_config: 'ConfigBase', updates: Dict[str, Any], section_path_str: str, ): full_sp = SectionPath.join(manager.pipeline_manager_name, section_path_str) reset_roots([full_sp.path_str]) return None
def _set_attr_for_item(self, item: str, attr: str, value: Any): section_path = SectionPath(item) manager_name = section_path[0] manager = self._managers[manager_name] relative_section_path_str = '.'.join(section_path[1:]) manager.update( {attr: value}, section_path_str=relative_section_path_str )
def handle_pipeline_manager_not_loaded_or_typo( full_section_path_str: str, managers: List['PipelineManager']): manager_name = SectionPath(full_section_path_str)[0] if manager_name in managers: # if manager is loaded # Even though manager is loaded, cannot find item. it is likely a typo. raise ItemNotFoundException( f'could not find item {full_section_path_str}') else: raise PipelineManagerNotLoadedException( 'create pipeline manager instance before using selectors')
def test_create_then_update_entry_for_function(self): self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() pipeline_manager.create('thing', a_function) iv = sel.test_pipeline_manager.thing.a_function expected_b_result = ['a', 'b'] section_path = SectionPath.from_section_str_list( SectionPath(iv.section_path_str)[1:]) pipeline_manager.update(b=expected_b_result, section_path_str=section_path.path_str) module_folder = os.path.join(self.defaults_path, 'thing') function_path = os.path.join(module_folder, 'a_function.py') with open(function_path, 'r') as f: contents = f.read() self.assert_a_function_config_file_contents(contents) result = pipeline_manager.run(iv) assert result == (None, expected_b_result)
def __init__( self, section_path: SectionPathLike, action: PyfileconfActions, file_path: Optional[str] = None, ): from pyfileconf.sectionpath.sectionpath import SectionPath self.section_path = SectionPath.from_ambiguous(section_path) self.action = action self.file_path = file_path
def get(self, section_path_str: str) -> Any: sp = SectionPath(section_path_str) obj = self for section in sp: if not isinstance(obj, self.__class__): # We already got an item from the collection and now # a further section is trying to be accessed. Therefore # this section is not in the collection raise AttributeError(section) obj = getattr(obj, section) return obj
def get_defaults(self) -> Dict[str, Dict[str, Any]]: logger.debug('Determining defaults for IterativeRunner') from pyfileconf import PipelineManager if not hasattr(self, 'cases'): raise ValueError('must set cases before calling get_defaults') case = self.cases[0] section_path_strs = [ self._get_full_section_path_str(conf['section_path_str']) for conf in case ] defaults: Dict[str, Dict[str, Any]] = {} for sp_str in section_path_strs: pm = PipelineManager.get_manager_by_section_path_str(sp_str) sp = SectionPath(sp_str) relative_section_path_str = SectionPath(".".join(sp[1:])).path_str config = pm.config.get(relative_section_path_str) if config is not None: defaults[sp_str] = {**config} logger.debug(f'Got {defaults} for IterativeRunner.defaults') return defaults
def get(self, section_path_str: str) -> Optional[ActiveFunctionConfig]: """ Handles config inheritance to get the active config for a section or function Args: section_path_str: Returns: """ config = self._get_func_or_section_configs(section_path_str) if self.section is None: raise ConfigManagerNotLoadedException( 'call .load() on ConfigManager before .get()') # First override for function defaults is global project config section_configs = [self.section.config] # Get configs, in order of highest level to lowest level. Will go from project to highest section, # down to lowest section. section_path = SectionPath(section_path_str) full_section = '' for section in section_path[: -1]: # skip the last section or function for special handling at end full_section += section # rebuilding full section path str section_configs.append( self._get_func_or_section_configs(full_section)) full_section += '.' # Last item of section_path may be another section, or the function/Pipeline itself. If it's a section, # must add config for override, but if is function, it is already the base config so should not update. full_section += section_path[-1] if not self._is_function_or_pipeline_path(full_section): # if is a section, not function/pipeline section_configs.append( self._get_func_or_section_configs(full_section)) if config: # Override configs. Default config is base config, then gets updated by project, then high # level sections to low level sections [ config.update(section_config) for section_config in section_configs ] # Last, override with local config config.update(self.local_config) return config
def _run(self) -> Any: from pyfileconf.main import PipelineManager results = [] for sp in self.run_items: # Look up appropriate manager and run it pm = PipelineManager.get_manager_by_section_path_str(sp.path_str) relative_section_path_str = SectionPath(".".join(sp[1:])).path_str result = pm.run(relative_section_path_str) results.append(result) if len(results) == 1: return results[0] return results
def update(self, updates: Iterable[Dict[str, Any]]) -> None: """ :param updates: list of kwarg dictionaries which would normally be provided to .update_batch :return: """ from pyfileconf.main import PipelineManager updates_lol = manager.plm.hook.pyfileconf_pre_update_batch( pm=self, updates=updates) all_updates = itertools.chain(*updates_lol) for update in all_updates: sp = SectionPath.from_ambiguous( update['section_path_str'], base_section_path_str=self.base_section_path_str, strip_manager_from_iv=self.strip_manager_from_iv) pm = PipelineManager.get_manager_by_section_path_str(sp.path_str) relative_section_path_str = SectionPath(".".join(sp[1:])).path_str new_update = { **update, 'section_path_str': relative_section_path_str } pm._update(**new_update) # type: ignore manager.plm.hook.pyfileconf_post_update_batch(pm=self, updates=updates)
def _is_function_or_pipeline_path(self, section_path_str: str) -> bool: section_path = SectionPath(section_path_str) # Goes into nested sections, until it pulls the final config or section config_or_section: ConfigSectionOrConfig = _get_from_nested_obj_by_section_path( self, section_path) if isinstance(config_or_section, ConfigSection): # must be section, not individual pipeline or function return False elif isinstance(config_or_section, (ActiveFunctionConfig, ActiveFunctionConfigFile)): # must be individual function as Config is returned return True else: raise ValueError( f'expected Config or ConfigSection, got {config_or_section} of type {config_or_section}' )