def create_variables(self, pm: PipelineManager): for letter in 'abcdef': pm.create(f'vars.some.{letter}') if letter == 'c': dtype = 'str' else: dtype = 'int' pm.update(section_path_str=f'vars.some.{letter}', dtype=dtype)
def create_combiners(self, pm: PipelineManager): opts = dc.CombineOptions(rows=False) pm.create('combinedata.some.thing') s = Selector() pm.update(section_path_str='combinedata.some.thing', data_sources=[ s.dcpm.sources.some.one, s.dcpm.sources.some.three, ], options_list=[opts], name='Combine One and Three')
def create_merges(self, pm: PipelineManager): s = Selector() opts = MergeOptions('C') pm.create('merges.some.thing') pm.update(section_path_str='merges.some.thing', data_sources=[ s.dcpm.sources.some.one, s.dcpm.sources.some.two, ], merge_options_list=[opts], name='Merge One Two')
def create_columns(self, pm: PipelineManager): c_index = dc.ColumnIndex(dc.Index(key='c'), [Selector().dcpm.vars.some.c]) for letter in 'abcdef': sp = f'cols.some.{letter}' pm.create(sp) base_vars_iv = Selector().dcpm.vars.some var_iv = getattr(base_vars_iv, letter) update_kwargs = dict(section_path_str=sp, variable=var_iv) if letter != 'c': update_kwargs['indices'] = [c_index] pm.update(**update_kwargs)
def create_example_configs(self, pm: PipelineManager): pm.create('confs', ConfigExample) pm.update( section_path_str='confs.ConfigExample', a=1000, b=2000, ) pm.create('confs2', ConfigExample) pm.update( section_path_str='confs.ConfigExample', a=3000, b=4000, )
def create_generators(self, pm: PipelineManager): s = Selector() cols = [ s.dcpm.cols.some.d, s.dcpm.cols.some.c, ] opts = dc.GenerationOptions(ds_generator_func, columns=cols) pm.create('gendata.some.thing') pm.update(section_path_str='gendata.some.thing', options=opts, name='Generate Three') s = Selector() pm.update(section_path_str='sources.some.three', pipeline=s.dcpm.gendata.some.thing)
def create_analysis(self, pm: PipelineManager, section_path_str: str, opts: Optional[dc.AnalysisOptions] = None, data_source: Optional[AnyDataSource] = None, name: str = 'Analysis'): s = Selector() if opts is None: opts = dc.AnalysisOptions(sum_all_numeric) if data_source is None: data_source = s.dcpm.transdata.some.thing pm.create(section_path_str) pm.update(section_path_str=section_path_str, data_source=data_source, options=opts, name=name)
def create_transform(self, pm: PipelineManager, section_path_str: str = 'transdata.some.thing', opts: Optional[dc.TransformOptions] = None, data_source: Optional[AnyDataSource] = None, name: str = 'Transform'): s = Selector() if opts is None: opts = dc.TransformOptions(source_transform_func, transform_key='add_one', out_path=self.transform_out_path) if data_source is None: data_source = s.dcpm.merges.some.thing pm.create(section_path_str) pm.update( section_path_str=section_path_str, data_source=data_source, options=opts, name=name, )
def create_sources(self, pm: PipelineManager): s = Selector() pm.create('sources.some.one') pm.update( section_path_str='sources.some.one', columns=[ s.dcpm.cols.some.a, s.dcpm.cols.some.b, s.dcpm.cols.some.c, ], location=self.csv_path, ) pm.create('sources.some.two') pm.update(section_path_str='sources.some.two', columns=[ s.dcpm.cols.some.e, s.dcpm.cols.some.f, s.dcpm.cols.some.c, ], location=self.csv_path2) pm.create('sources.some.three') pm.update( section_path_str='sources.some.three', columns=[ s.dcpm.cols.some.d, s.dcpm.cols.some.c, ], )
def create_function(self, pm: PipelineManager, section_path_str='stuff.thing'): pm.create(section_path_str, a_function) pm.update(section_path_str=section_path_str + '.a_function', a='abc')