Example #1
0
 def create_variables(self, pm: PipelineManager):
     for letter in 'abcdef':
         pm.create(f'vars.some.{letter}')
         if letter == 'c':
             dtype = 'str'
         else:
             dtype = 'int'
         pm.update(section_path_str=f'vars.some.{letter}', dtype=dtype)
Example #2
0
 def create_combiners(self, pm: PipelineManager):
     opts = dc.CombineOptions(rows=False)
     pm.create('combinedata.some.thing')
     s = Selector()
     pm.update(section_path_str='combinedata.some.thing',
               data_sources=[
                   s.dcpm.sources.some.one,
                   s.dcpm.sources.some.three,
               ],
               options_list=[opts],
               name='Combine One and Three')
Example #3
0
 def create_merges(self, pm: PipelineManager):
     s = Selector()
     opts = MergeOptions('C')
     pm.create('merges.some.thing')
     pm.update(section_path_str='merges.some.thing',
               data_sources=[
                   s.dcpm.sources.some.one,
                   s.dcpm.sources.some.two,
               ],
               merge_options_list=[opts],
               name='Merge One Two')
Example #4
0
 def create_columns(self, pm: PipelineManager):
     c_index = dc.ColumnIndex(dc.Index(key='c'),
                              [Selector().dcpm.vars.some.c])
     for letter in 'abcdef':
         sp = f'cols.some.{letter}'
         pm.create(sp)
         base_vars_iv = Selector().dcpm.vars.some
         var_iv = getattr(base_vars_iv, letter)
         update_kwargs = dict(section_path_str=sp, variable=var_iv)
         if letter != 'c':
             update_kwargs['indices'] = [c_index]
         pm.update(**update_kwargs)
Example #5
0
def _refresh_configs(section_paths: Iterable[SectionPath]):
    from pyfileconf import PipelineManager

    for sp in section_paths:
        manager = PipelineManager.get_manager_by_section_path_str(sp.path_str)
        relative_section_path = SectionPath('.'.join(sp[1:]))
        manager.refresh(relative_section_path.path_str)
Example #6
0
 def create_analysis(self,
                     pm: PipelineManager,
                     section_path_str: str,
                     opts: Optional[dc.AnalysisOptions] = None,
                     data_source: Optional[AnyDataSource] = None,
                     name: str = 'Analysis'):
     s = Selector()
     if opts is None:
         opts = dc.AnalysisOptions(sum_all_numeric)
     if data_source is None:
         data_source = s.dcpm.transdata.some.thing
     pm.create(section_path_str)
     pm.update(section_path_str=section_path_str,
               data_source=data_source,
               options=opts,
               name=name)
Example #7
0
    def create_graph(self,
                     pm: PipelineManager,
                     include_attrs: Optional[Sequence[str]] = None,
                     func_dict: Optional[Dict[str, GraphFunction]] = None):
        if include_attrs is None:
            include_attrs = [
                'difficulty', '_section_path_str', '_operation_index',
                'last_modified'
            ]
        if func_dict is None:
            func_dict = {
                'Has df':
                has_df,
                'cols':
                lambda source: [col.load_key for col in source.columns]
                if hasattr(source, 'columns') else None,
                'F Links':
                lambda source: len(source.forward_links),
                'B Links':
                lambda source: len(source.back_links)
            }

        collection = {attr: pm.get(attr) for attr in DATA_ATTRS}
        explorer = dc.DataExplorer.from_dict(collection)
        explorer.graph(include_attrs=include_attrs,
                       func_dict=func_dict).render(GRAPH_PATH)
Example #8
0
 def create_example_configs(self, pm: PipelineManager):
     pm.create('confs', ConfigExample)
     pm.update(
         section_path_str='confs.ConfigExample',
         a=1000,
         b=2000,
     )
     pm.create('confs2', ConfigExample)
     pm.update(
         section_path_str='confs.ConfigExample',
         a=3000,
         b=4000,
     )
Example #9
0
 def create_pm(self, **kwargs):
     all_kwargs = dict(
         folder=self.pm_folder,
         name=self.test_name,
         log_folder=self.logs_path,
         default_config_folder_name=self.defaults_folder_name,
         specific_class_config_dicts=SPECIFIC_CLASS_CONFIG_DICTS,
     )
     all_kwargs.update(**kwargs)
     pipeline_manager = PipelineManager(**all_kwargs)
     return pipeline_manager
Example #10
0
    def reset(self, section_path_strs: Iterable[str]):
        from pyfileconf.main import PipelineManager

        for sp_str in section_path_strs:
            sp = SectionPath.from_ambiguous(
                sp_str,
                base_section_path_str=self.base_section_path_str,
                strip_manager_from_iv=self.strip_manager_from_iv)
            pm = PipelineManager.get_manager_by_section_path_str(sp.path_str)
            relative_section_path_str = SectionPath(".".join(sp[1:])).path_str
            pm.reset(relative_section_path_str)
Example #11
0
 def create_transform(self,
                      pm: PipelineManager,
                      section_path_str: str = 'transdata.some.thing',
                      opts: Optional[dc.TransformOptions] = None,
                      data_source: Optional[AnyDataSource] = None,
                      name: str = 'Transform'):
     s = Selector()
     if opts is None:
         opts = dc.TransformOptions(source_transform_func,
                                    transform_key='add_one',
                                    out_path=self.transform_out_path)
     if data_source is None:
         data_source = s.dcpm.merges.some.thing
     pm.create(section_path_str)
     pm.update(
         section_path_str=section_path_str,
         data_source=data_source,
         options=opts,
         name=name,
     )
Example #12
0
    def _get_real_item(self, item):
        from pyfileconf import context
        from pyfileconf.main import PipelineManager
        manager = PipelineManager.get_manager_by_section_path_str(item)
        relative_section_path = SectionPath('.'.join(SectionPath(item)[1:]))

        if context.file_is_currently_being_loaded:
            context.add_config_dependency(
                context.stack.currently_loading_file_section_path, item, force_update=True
            )

        return _get_from_nested_obj_by_section_path(manager, relative_section_path)
Example #13
0
 def create_generators(self, pm: PipelineManager):
     s = Selector()
     cols = [
         s.dcpm.cols.some.d,
         s.dcpm.cols.some.c,
     ]
     opts = dc.GenerationOptions(ds_generator_func, columns=cols)
     pm.create('gendata.some.thing')
     pm.update(section_path_str='gendata.some.thing',
               options=opts,
               name='Generate Three')
     s = Selector()
     pm.update(section_path_str='sources.some.three',
               pipeline=s.dcpm.gendata.some.thing)
Example #14
0
    def update(self, updates: Iterable[Dict[str, Any]]) -> None:
        """
        :param updates: list of kwarg dictionaries which would normally be provided to .update_batch
        :return:
        """
        from pyfileconf.main import PipelineManager

        updates_lol = manager.plm.hook.pyfileconf_pre_update_batch(
            pm=self, updates=updates)

        all_updates = itertools.chain(*updates_lol)
        for update in all_updates:
            sp = SectionPath.from_ambiguous(
                update['section_path_str'],
                base_section_path_str=self.base_section_path_str,
                strip_manager_from_iv=self.strip_manager_from_iv)
            pm = PipelineManager.get_manager_by_section_path_str(sp.path_str)
            relative_section_path_str = SectionPath(".".join(sp[1:])).path_str
            new_update = {
                **update, 'section_path_str': relative_section_path_str
            }
            pm._update(**new_update)  # type: ignore

        manager.plm.hook.pyfileconf_post_update_batch(pm=self, updates=updates)
Example #15
0
 def pipeline_manager(self) -> 'PipelineManager':
     from pyfileconf.main import PipelineManager
     return PipelineManager.get_manager_by_section_path_str(
         self.pipeline_manager_name)
Example #16
0
    def __call__(self, *args, **kwargs):
        from pyfileconf import PipelineManager

        # When calling, assume user always wants the real item
        actual_item = self.selector._get_real_item(self.section_path_str)
        # If this happened while running another item, add to dependencies
        self._add_to_config_dependencies_if_necessary()

        # Determine whether this object is from a specific class collection
        manager = PipelineManager.get_manager_by_section_path_str(
            self.section_path_str)
        collection_name = SectionPath(self.section_path_str)[1]
        try:
            manager._registrar_dict[collection_name]
            specific_class = True
        except KeyError:
            specific_class = False

        # Handle depending on the type of item
        if isinstance(actual_item, partial):
            # Got a function in the general registrar
            func = actual_item
        elif specific_class and isinstance(actual_item,
                                           self._specific_classes):
            # Got specific registrar class
            # Need to look up the execute attribute and apply section path str
            actual_item._section_path_str = self.section_path_str
            collection = self._specific_class_collection_map[type(actual_item)]
            execute_attr = collection.execute_attr
            func = getattr(actual_item, execute_attr)
        else:
            cannot_parse_error = ValueError(
                f'could not parse actual item, expected partial, '
                f'specific class, or method of specific class. '
                f'Got {actual_item} of type {type(actual_item)}')
            orig_item: Any = None
            try:
                orig_item = actual_item.__self__
                is_bound_method = True
            except AttributeError:
                is_bound_method = False

            if is_bound_method:
                if not isinstance(orig_item, self._specific_classes):
                    # Is bound method, but not for one of defined specific classes
                    raise cannot_parse_error

                # Got specific class method
                # Add section path to original item and then set method to be called
                orig_item_sp = SectionPath.from_section_str_list(
                    SectionPath(self.section_path_str)[:-1])
                orig_item_sp_str = orig_item_sp.path_str
                orig_item._section_path_str = orig_item_sp_str
                func = actual_item
            else:
                if inspect.isclass(actual_item):
                    raise cannot_parse_error
                # Got a class object in the general registrar
                actual_item._section_path_str = self.section_path_str
                # Simply return it
                return actual_item

        result = func(*args, **kwargs)
        return result
Example #17
0
 def create_function(self,
                     pm: PipelineManager,
                     section_path_str='stuff.thing'):
     pm.create(section_path_str, a_function)
     pm.update(section_path_str=section_path_str + '.a_function', a='abc')
Example #18
0
 def create_sources(self, pm: PipelineManager):
     s = Selector()
     pm.create('sources.some.one')
     pm.update(
         section_path_str='sources.some.one',
         columns=[
             s.dcpm.cols.some.a,
             s.dcpm.cols.some.b,
             s.dcpm.cols.some.c,
         ],
         location=self.csv_path,
     )
     pm.create('sources.some.two')
     pm.update(section_path_str='sources.some.two',
               columns=[
                   s.dcpm.cols.some.e,
                   s.dcpm.cols.some.f,
                   s.dcpm.cols.some.c,
               ],
               location=self.csv_path2)
     pm.create('sources.some.three')
     pm.update(
         section_path_str='sources.some.three',
         columns=[
             s.dcpm.cols.some.d,
             s.dcpm.cols.some.c,
         ],
     )