def create_variables(self, pm: PipelineManager): for letter in 'abcdef': pm.create(f'vars.some.{letter}') if letter == 'c': dtype = 'str' else: dtype = 'int' pm.update(section_path_str=f'vars.some.{letter}', dtype=dtype)
def create_combiners(self, pm: PipelineManager): opts = dc.CombineOptions(rows=False) pm.create('combinedata.some.thing') s = Selector() pm.update(section_path_str='combinedata.some.thing', data_sources=[ s.dcpm.sources.some.one, s.dcpm.sources.some.three, ], options_list=[opts], name='Combine One and Three')
def create_merges(self, pm: PipelineManager): s = Selector() opts = MergeOptions('C') pm.create('merges.some.thing') pm.update(section_path_str='merges.some.thing', data_sources=[ s.dcpm.sources.some.one, s.dcpm.sources.some.two, ], merge_options_list=[opts], name='Merge One Two')
def create_columns(self, pm: PipelineManager): c_index = dc.ColumnIndex(dc.Index(key='c'), [Selector().dcpm.vars.some.c]) for letter in 'abcdef': sp = f'cols.some.{letter}' pm.create(sp) base_vars_iv = Selector().dcpm.vars.some var_iv = getattr(base_vars_iv, letter) update_kwargs = dict(section_path_str=sp, variable=var_iv) if letter != 'c': update_kwargs['indices'] = [c_index] pm.update(**update_kwargs)
def _refresh_configs(section_paths: Iterable[SectionPath]): from pyfileconf import PipelineManager for sp in section_paths: manager = PipelineManager.get_manager_by_section_path_str(sp.path_str) relative_section_path = SectionPath('.'.join(sp[1:])) manager.refresh(relative_section_path.path_str)
def create_analysis(self, pm: PipelineManager, section_path_str: str, opts: Optional[dc.AnalysisOptions] = None, data_source: Optional[AnyDataSource] = None, name: str = 'Analysis'): s = Selector() if opts is None: opts = dc.AnalysisOptions(sum_all_numeric) if data_source is None: data_source = s.dcpm.transdata.some.thing pm.create(section_path_str) pm.update(section_path_str=section_path_str, data_source=data_source, options=opts, name=name)
def create_graph(self, pm: PipelineManager, include_attrs: Optional[Sequence[str]] = None, func_dict: Optional[Dict[str, GraphFunction]] = None): if include_attrs is None: include_attrs = [ 'difficulty', '_section_path_str', '_operation_index', 'last_modified' ] if func_dict is None: func_dict = { 'Has df': has_df, 'cols': lambda source: [col.load_key for col in source.columns] if hasattr(source, 'columns') else None, 'F Links': lambda source: len(source.forward_links), 'B Links': lambda source: len(source.back_links) } collection = {attr: pm.get(attr) for attr in DATA_ATTRS} explorer = dc.DataExplorer.from_dict(collection) explorer.graph(include_attrs=include_attrs, func_dict=func_dict).render(GRAPH_PATH)
def create_example_configs(self, pm: PipelineManager): pm.create('confs', ConfigExample) pm.update( section_path_str='confs.ConfigExample', a=1000, b=2000, ) pm.create('confs2', ConfigExample) pm.update( section_path_str='confs.ConfigExample', a=3000, b=4000, )
def create_pm(self, **kwargs): all_kwargs = dict( folder=self.pm_folder, name=self.test_name, log_folder=self.logs_path, default_config_folder_name=self.defaults_folder_name, specific_class_config_dicts=SPECIFIC_CLASS_CONFIG_DICTS, ) all_kwargs.update(**kwargs) pipeline_manager = PipelineManager(**all_kwargs) return pipeline_manager
def reset(self, section_path_strs: Iterable[str]): from pyfileconf.main import PipelineManager for sp_str in section_path_strs: sp = SectionPath.from_ambiguous( sp_str, base_section_path_str=self.base_section_path_str, strip_manager_from_iv=self.strip_manager_from_iv) pm = PipelineManager.get_manager_by_section_path_str(sp.path_str) relative_section_path_str = SectionPath(".".join(sp[1:])).path_str pm.reset(relative_section_path_str)
def create_transform(self, pm: PipelineManager, section_path_str: str = 'transdata.some.thing', opts: Optional[dc.TransformOptions] = None, data_source: Optional[AnyDataSource] = None, name: str = 'Transform'): s = Selector() if opts is None: opts = dc.TransformOptions(source_transform_func, transform_key='add_one', out_path=self.transform_out_path) if data_source is None: data_source = s.dcpm.merges.some.thing pm.create(section_path_str) pm.update( section_path_str=section_path_str, data_source=data_source, options=opts, name=name, )
def _get_real_item(self, item): from pyfileconf import context from pyfileconf.main import PipelineManager manager = PipelineManager.get_manager_by_section_path_str(item) relative_section_path = SectionPath('.'.join(SectionPath(item)[1:])) if context.file_is_currently_being_loaded: context.add_config_dependency( context.stack.currently_loading_file_section_path, item, force_update=True ) return _get_from_nested_obj_by_section_path(manager, relative_section_path)
def create_generators(self, pm: PipelineManager): s = Selector() cols = [ s.dcpm.cols.some.d, s.dcpm.cols.some.c, ] opts = dc.GenerationOptions(ds_generator_func, columns=cols) pm.create('gendata.some.thing') pm.update(section_path_str='gendata.some.thing', options=opts, name='Generate Three') s = Selector() pm.update(section_path_str='sources.some.three', pipeline=s.dcpm.gendata.some.thing)
def update(self, updates: Iterable[Dict[str, Any]]) -> None: """ :param updates: list of kwarg dictionaries which would normally be provided to .update_batch :return: """ from pyfileconf.main import PipelineManager updates_lol = manager.plm.hook.pyfileconf_pre_update_batch( pm=self, updates=updates) all_updates = itertools.chain(*updates_lol) for update in all_updates: sp = SectionPath.from_ambiguous( update['section_path_str'], base_section_path_str=self.base_section_path_str, strip_manager_from_iv=self.strip_manager_from_iv) pm = PipelineManager.get_manager_by_section_path_str(sp.path_str) relative_section_path_str = SectionPath(".".join(sp[1:])).path_str new_update = { **update, 'section_path_str': relative_section_path_str } pm._update(**new_update) # type: ignore manager.plm.hook.pyfileconf_post_update_batch(pm=self, updates=updates)
def pipeline_manager(self) -> 'PipelineManager': from pyfileconf.main import PipelineManager return PipelineManager.get_manager_by_section_path_str( self.pipeline_manager_name)
def __call__(self, *args, **kwargs): from pyfileconf import PipelineManager # When calling, assume user always wants the real item actual_item = self.selector._get_real_item(self.section_path_str) # If this happened while running another item, add to dependencies self._add_to_config_dependencies_if_necessary() # Determine whether this object is from a specific class collection manager = PipelineManager.get_manager_by_section_path_str( self.section_path_str) collection_name = SectionPath(self.section_path_str)[1] try: manager._registrar_dict[collection_name] specific_class = True except KeyError: specific_class = False # Handle depending on the type of item if isinstance(actual_item, partial): # Got a function in the general registrar func = actual_item elif specific_class and isinstance(actual_item, self._specific_classes): # Got specific registrar class # Need to look up the execute attribute and apply section path str actual_item._section_path_str = self.section_path_str collection = self._specific_class_collection_map[type(actual_item)] execute_attr = collection.execute_attr func = getattr(actual_item, execute_attr) else: cannot_parse_error = ValueError( f'could not parse actual item, expected partial, ' f'specific class, or method of specific class. ' f'Got {actual_item} of type {type(actual_item)}') orig_item: Any = None try: orig_item = actual_item.__self__ is_bound_method = True except AttributeError: is_bound_method = False if is_bound_method: if not isinstance(orig_item, self._specific_classes): # Is bound method, but not for one of defined specific classes raise cannot_parse_error # Got specific class method # Add section path to original item and then set method to be called orig_item_sp = SectionPath.from_section_str_list( SectionPath(self.section_path_str)[:-1]) orig_item_sp_str = orig_item_sp.path_str orig_item._section_path_str = orig_item_sp_str func = actual_item else: if inspect.isclass(actual_item): raise cannot_parse_error # Got a class object in the general registrar actual_item._section_path_str = self.section_path_str # Simply return it return actual_item result = func(*args, **kwargs) return result
def create_function(self, pm: PipelineManager, section_path_str='stuff.thing'): pm.create(section_path_str, a_function) pm.update(section_path_str=section_path_str + '.a_function', a='abc')
def create_sources(self, pm: PipelineManager): s = Selector() pm.create('sources.some.one') pm.update( section_path_str='sources.some.one', columns=[ s.dcpm.cols.some.a, s.dcpm.cols.some.b, s.dcpm.cols.some.c, ], location=self.csv_path, ) pm.create('sources.some.two') pm.update(section_path_str='sources.some.two', columns=[ s.dcpm.cols.some.e, s.dcpm.cols.some.f, s.dcpm.cols.some.c, ], location=self.csv_path2) pm.create('sources.some.three') pm.update( section_path_str='sources.some.three', columns=[ s.dcpm.cols.some.d, s.dcpm.cols.some.c, ], )