def test_create_multiple_pms_with_class(self): self.write_example_class_to_pipeline_dict_file() self.write_example_class_to_pipeline_dict_file(file_path=self.second_pipeline_dict_path) pipeline_manager = self.create_pm() pipeline_manager.load() pipeline_manager2 = self.create_pm( folder=self.second_pm_folder, name=self.second_test_name, ) pipeline_manager2.load() # Assert pipeline manager 1 contents sel = Selector() iv = sel.test_pipeline_manager.stuff.ExampleClass module_folder = os.path.join(self.defaults_path, 'stuff') class_path = os.path.join(module_folder, 'ExampleClass.py') with open(class_path, 'r') as f: contents = f.read() self.assert_example_class_config_file_contents(contents) # Assert pipeline manager 2 contents sel = Selector() iv = sel.test_pipeline_manager2.stuff.ExampleClass module_folder = os.path.join(self.second_defaults_path, 'stuff') class_path = os.path.join(module_folder, 'ExampleClass.py') with open(class_path, 'r') as f: contents = f.read() self.assert_example_class_config_file_contents(contents)
def create_columns(self, pm: PipelineManager): c_index = dc.ColumnIndex(dc.Index(key='c'), [Selector().dcpm.vars.some.c]) for letter in 'abcdef': sp = f'cols.some.{letter}' pm.create(sp) base_vars_iv = Selector().dcpm.vars.some var_iv = getattr(base_vars_iv, letter) update_kwargs = dict(section_path_str=sp, variable=var_iv) if letter != 'c': update_kwargs['indices'] = [c_index] pm.update(**update_kwargs)
def create_generators(self, pm: PipelineManager): s = Selector() cols = [ s.dcpm.cols.some.d, s.dcpm.cols.some.c, ] opts = dc.GenerationOptions(ds_generator_func, columns=cols) pm.create('gendata.some.thing') pm.update(section_path_str='gendata.some.thing', options=opts, name='Generate Three') s = Selector() pm.update(section_path_str='sources.some.three', pipeline=s.dcpm.gendata.some.thing)
def test_function_iv_from_selector(self): self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.stuff.a_function self.assert_valid_function_iv(iv, pipeline_manager)
def test_create_pm_with_class_dict_imports_and_assigns(self): self.write_example_class_dict_to_file() always_imports = [ 'from copy import deepcopy', 'from functools import partial' ] always_assigns = [ 'my_var = 6', 'stuff = list((1,))' ] class_config_dict_list = deepcopy(CLASS_CONFIG_DICT_LIST) class_config_dict_list[0].update( always_import_strs=always_imports, always_assign_strs=always_assigns, ) pipeline_manager = self.create_pm( specific_class_config_dicts=class_config_dict_list, ) pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.example_class.stuff.data class_folder = os.path.join(self.defaults_path, 'example_class') module_folder = os.path.join(class_folder, 'stuff') class_path = os.path.join(module_folder, 'data.py') with open(class_path, 'r') as f: contents = f.read() self.assert_example_class_dict_config_file_contents( contents, imports=always_imports, assigns=always_assigns )
def test_class_iv_from_selector(self): self.write_example_class_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.stuff.ExampleClass self.assert_valid_class_iv(iv, pipeline_manager)
def test_create_then_update_entry_for_specific_class_dict(self): self.write_example_class_dict_to_file() pipeline_manager = self.create_pm( specific_class_config_dicts=CLASS_CONFIG_DICT_LIST) pipeline_manager.load() sel = Selector() pipeline_manager.create('example_class.thing.data') iv = sel.test_pipeline_manager.example_class.thing.data expected_a_result = (1, 2) section_path = SectionPath.from_section_str_list( SectionPath(iv.section_path_str)[1:]) pipeline_manager.update(a=expected_a_result, section_path_str=section_path.path_str) class_folder = os.path.join(self.defaults_path, 'example_class') module_folder = os.path.join(class_folder, 'thing') class_path = os.path.join(module_folder, 'data.py') with open(class_path, 'r') as f: contents = f.read() self.assert_example_class_dict_config_file_contents(contents) ec = sel.test_pipeline_manager.example_class.thing.data result = pipeline_manager.run(ec) assert result == 'woo' expect_ec = ExampleClass(name='data', a=expected_a_result) got_ec = ec.item assert ec.name == expect_ec.name == got_ec.name assert ec.a == expect_ec.a == got_ec.a
def dependent_call(self): from pyfileconf import context # Get active pipeline manager assert len(context.active_managers) == 1 manager = list(context.active_managers.values())[0] s = Selector() # Specific classes # Access ItemView - should not be dependency obj = s.test_pipeline_manager.example_class.stuff.data2 # Access attr a = s.test_pipeline_manager.example_class.stuff.data3.a # Classes # Call ItemView b = s.test_pipeline_manager.ec.ExampleClass() # Get by PM c = manager.get(s.test_pipeline_manager.ec2.ExampleClass) # Access ItemView - should not be dependency f = s.test_pipeline_manager.ec3.ExampleClass # Functions # Call function d = s.test_pipeline_manager.af.a_function() # Run by PM e = manager.run(s.test_pipeline_manager.af2.a_function) # Access ItemView - should not be dependency g = s.test_pipeline_manager.af3.a_function return obj, a, b, c, d, e, f, g
def test_run_iter_function_dependent_on_other_pm_class(self): self.write_a_function_to_pipeline_dict_file() self.write_example_class_dict_to_file(pm_index=1) pipeline_manager = self.create_pm() pipeline_manager.load() pipeline_manager2 = self.create_pm( folder=self.second_pm_folder, name=self.second_test_name, specific_class_config_dicts=CLASS_CONFIG_DICT_LIST, ) pipeline_manager2.load() self.append_to_a_function_config( 'a = s.test_pipeline_manager2.example_class.stuff.data') pipeline_manager.reload() sel = Selector() expected_a_result = ['a', 'b'] iv = sel.test_pipeline_manager.stuff.a_function iv2 = sel.test_pipeline_manager2.example_class.stuff.data # Assert that update pipeline manager 2 affects pipeline manager 1 cd = dict(a=expected_a_result, section_path_str=iv2.section_path_str) config_dicts = [cd] runner = IterativeRunner(iv, config_dicts) result = runner.run() assert result == [((cd, ), (iv2, None))] assert iv2.a == expected_a_result assert context.config_dependencies == self.expect_pm_1_a_function_depends_on_pm_2_specific_class
def test_create_class(self): self.write_example_class_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() ec = sel.test_pipeline_manager.stuff.ExampleClass() assert ec == ExampleClass(None)
def test_run_iter_function_dependent_on_class(self): cd_d1_1 = dict(section_path_str='example_class_with_update.stuff.data', a=2) cd_d1_2 = dict(section_path_str='example_class_with_update.stuff.data', a=10) cd_d2_1 = dict( section_path_str='example_class_with_update.stuff2.data', a=15) cd_d2_2 = dict( section_path_str='example_class_with_update.stuff2.data', a=30) config_dicts = [cd_d1_1, cd_d1_2, cd_d2_1, cd_d2_2] self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm( specific_class_config_dicts=CUSTOM_UPDATE_CLASS_CONFIG_DICT_LIST, ) pipeline_manager.load() pipeline_manager.create('example_class_with_update.stuff.data') pipeline_manager.create('example_class_with_update.stuff2.data') self.append_to_a_function_config( 'a = s.test_pipeline_manager.example_class_with_update.stuff.data.a\n' ) self.append_to_a_function_config( 'b = s.test_pipeline_manager.example_class_with_update.stuff2.data.a' ) pipeline_manager.reload() # run to set attribute dependencies. counts as two changes # first change is setting config for example_class_with_update.stuff.data # second change is setting config for example_class_with_update.stuff2.data pipeline_manager.run('stuff.a_function') sel = Selector() expect_results = [ ( # two changes (cd_d1_1, cd_d2_1), (2, 15)), ( # one change (cd_d1_1, cd_d2_2), (2, 30)), ( # two changes (cd_d1_2, cd_d2_1), (10, 15)), ( # one change (cd_d1_2, cd_d2_2), (10, 30), ) ] iv = sel.test_pipeline_manager.stuff.a_function result = pipeline_manager.run_product(iv, config_dicts) # 7 config updates, corresponding to when config changes assert ExampleClassWithCustomUpdate.num_updates == 8 assert result == expect_results for res, expect_res in zip( pipeline_manager.run_product_gen(iv, config_dicts), expect_results): assert res == expect_res result = pipeline_manager.run_product(iv, config_dicts, collect_results=False) assert result == []
def test_run_function_with_underlying_iterative_runner_and_nested_configs( self): cd = dict(section_path_str='test_pipeline_manager.stuff.a_function', b=10, a=2) cd2 = dict(section_path_str='test_pipeline_manager.stuff.a_function', b=20) config_dicts = [cd, cd2] expect_results = [((cd, ), (2, 10)), ((cd2, ), (None, 20))] self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() pipeline_manager.create('stuff', a_function_that_calls_iterative_runner) pipeline_manager.create('stuff', ExampleClass) pipeline_manager.create('stuff2', ExampleClass) pipeline_manager.update( section_path_str='stuff.ExampleClass', a=sel.test_pipeline_manager.stuff2.ExampleClass) pipeline_manager.update( section_path_str='stuff.a_function_that_calls_iterative_runner', to_run=[sel.test_pipeline_manager.stuff.a_function], cases=config_dicts, ec=sel.test_pipeline_manager.stuff.ExampleClass) iv = sel.test_pipeline_manager.stuff.a_function_that_calls_iterative_runner result = pipeline_manager.run(iv) assert result == expect_results
def test_create_from_specific_class_dict_multiple_pms(self): self.write_example_class_dict_to_file() self.write_example_class_dict_to_file(pm_index=1) pipeline_manager = self.create_pm( specific_class_config_dicts=CLASS_CONFIG_DICT_LIST) pipeline_manager.load() pipeline_manager2 = self.create_pm( folder=self.second_pm_folder, name=self.second_test_name, specific_class_config_dicts=CLASS_CONFIG_DICT_LIST, ) pipeline_manager2.load() sel = Selector() # Assert pipeline manager 1 create ec = sel.test_pipeline_manager.example_class.stuff.data expect_ec = ExampleClass(None, name='data') assert ec.name == expect_ec.name assert ec.a == expect_ec.a # Assert pipeline manager 2 create ec = sel.test_pipeline_manager2.example_class.stuff.data expect_ec = ExampleClass(None, name='data') assert ec.name == expect_ec.name assert ec.a == expect_ec.a
def test_get_specific_class_section_path(self): self.write_example_class_dict_to_file() ccdl = deepcopy(CLASS_CONFIG_DICT_LIST) for config_dict in ccdl: config_dict['execute_attr'] = 'return_section_path_str' pipeline_manager = self.create_pm( specific_class_config_dicts=ccdl ) pipeline_manager.load() sel = Selector() assert sel.test_pipeline_manager.example_class.stuff.data.return_section_path_str() == \ 'test_pipeline_manager.example_class.stuff.data' iv = sel.test_pipeline_manager.example_class.stuff.data # result of __call__ on ExampleClass, should not have _section_path_str on result, but should have in object iv_run = iv() pm_run = pipeline_manager.run(iv) assert not hasattr(iv_run, '_section_path_str') assert iv_run == pm_run == 'test_pipeline_manager.example_class.stuff.data' # attribute access should be normal, not have _section_path_str iv_attr = iv.a assert not hasattr(iv_attr, '_section_path_str') assert iv_attr is None # property access should be normal, not have _section_path_str iv_property = iv.my_property assert not hasattr(iv_property, '_section_path_str') assert iv_property == 100 iv_obj = pipeline_manager.get(iv) str_obj = pipeline_manager.get('example_class.stuff.data') for obj in [iv, iv_obj, str_obj]: sp = obj._section_path_str assert sp == 'test_pipeline_manager.example_class.stuff.data'
def test_create_multiple_pms_with_class_dict(self): self.write_example_class_dict_to_file() self.write_example_class_dict_to_file(pm_index=1) pipeline_manager = self.create_pm( specific_class_config_dicts=CLASS_CONFIG_DICT_LIST, ) pipeline_manager.load() pipeline_manager2 = self.create_pm( folder=self.second_pm_folder, name=self.second_test_name, specific_class_config_dicts=CLASS_CONFIG_DICT_LIST, ) pipeline_manager2.load() sel = Selector() # Assert pipeline manager 1 contents iv = sel.test_pipeline_manager.example_class.stuff.data class_folder = os.path.join(self.defaults_path, 'example_class') module_folder = os.path.join(class_folder, 'stuff') class_path = os.path.join(module_folder, 'data.py') with open(class_path, 'r') as f: contents = f.read() self.assert_example_class_dict_config_file_contents(contents) # Assert pipeline manager 2 contents iv = sel.test_pipeline_manager2.example_class.stuff.data class_folder = os.path.join(self.second_defaults_path, 'example_class') module_folder = os.path.join(class_folder, 'stuff') class_path = os.path.join(module_folder, 'data.py') with open(class_path, 'r') as f: contents = f.read() self.assert_example_class_dict_config_file_contents(contents)
def test_get_specific_class_dict_section_multiple_pms(self): self.write_example_class_dict_to_file() self.write_example_class_dict_to_file(pm_index=1) pipeline_manager = self.create_pm( specific_class_config_dicts=CLASS_CONFIG_DICT_LIST ) pipeline_manager.load() pipeline_manager2 = self.create_pm( folder=self.second_pm_folder, name=self.second_test_name, specific_class_config_dicts=CLASS_CONFIG_DICT_LIST, ) pipeline_manager2.load() sel = Selector() # Get pipeline manager 1 section iv = sel.test_pipeline_manager.example_class.stuff expect_ec = ExampleClass(None, name='data') iv_section = pipeline_manager.get(iv) iv_obj = iv_section[0] str_section = pipeline_manager.get('example_class.stuff') str_obj = str_section[0] assert iv_obj.name == str_obj.name == expect_ec.name assert iv_obj.a == str_obj.a == expect_ec.a # Get pipeline manager 2 section iv = sel.test_pipeline_manager2.example_class.stuff expect_ec = ExampleClass(None, name='data') iv_section = pipeline_manager2.get(iv) iv_obj = iv_section[0] str_section = pipeline_manager2.get('example_class.stuff') str_obj = str_section[0] assert iv_obj.name == str_obj.name == expect_ec.name assert iv_obj.a == str_obj.a == expect_ec.a
def create_sources(self, pm: PipelineManager): s = Selector() pm.create('sources.some.one') pm.update( section_path_str='sources.some.one', columns=[ s.dcpm.cols.some.a, s.dcpm.cols.some.b, s.dcpm.cols.some.c, ], location=self.csv_path, ) pm.create('sources.some.two') pm.update(section_path_str='sources.some.two', columns=[ s.dcpm.cols.some.e, s.dcpm.cols.some.f, s.dcpm.cols.some.c, ], location=self.csv_path2) pm.create('sources.some.three') pm.update( section_path_str='sources.some.three', columns=[ s.dcpm.cols.some.d, s.dcpm.cols.some.c, ], )
def test_get_class_multiple_pms(self): self.write_example_class_to_pipeline_dict_file() self.write_example_class_to_pipeline_dict_file(file_path=self.second_pipeline_dict_path) pipeline_manager = self.create_pm() pipeline_manager.load() pipeline_manager2 = self.create_pm( folder=self.second_pm_folder, name=self.second_test_name, ) pipeline_manager2.load() sel = Selector() # Get from pipeline manager 1 iv = sel.test_pipeline_manager.stuff.ExampleClass iv_obj = pipeline_manager.get(iv) str_obj = pipeline_manager.get('stuff.ExampleClass') assert iv_obj is str_obj is iv() assert iv_obj == ExampleClass(None) # Get from pipeline manager 2 iv = sel.test_pipeline_manager2.stuff.ExampleClass iv_obj = pipeline_manager2.get(iv) str_obj = pipeline_manager2.get('stuff.ExampleClass') assert iv_obj is str_obj is iv() assert iv_obj == ExampleClass(None)
def test_get_function_multiple_pms(self): self.write_a_function_to_pipeline_dict_file() self.write_a_function_to_pipeline_dict_file(file_path=self.second_pipeline_dict_path) pipeline_manager = self.create_pm() pipeline_manager.load() pipeline_manager2 = self.create_pm( folder=self.second_pm_folder, name=self.second_test_name, ) pipeline_manager2.load() sel = Selector() # Get from pipeline manager 1 iv = sel.test_pipeline_manager.stuff.a_function iv_func = pipeline_manager.get(iv) iv_result = iv_func() str_func = pipeline_manager.get('stuff.a_function') str_result = str_func() assert iv_result == str_result == (None, None) # Get from pipeline manager 2 iv = sel.test_pipeline_manager2.stuff.a_function iv_func = pipeline_manager2.get(iv) iv_result = iv_func() str_func = pipeline_manager2.get('stuff.a_function') str_result = str_func() assert iv_result == str_result == (None, None)
def create_default_analyses(self, pm: PipelineManager): s = Selector() self.create_analysis(pm, 'analysis.some.one', name='Analysis One') self.create_analysis(pm, 'analysis.some.two', data_source=s.dcpm.combinedata.some.thing, name='Analysis Two')
def test_iter_no_plugins(self): self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.stuff.a_function cd = dict(section_path_str="test_pipeline_manager.stuff.a_function", b=10, a=2) cd2 = dict(section_path_str="test_pipeline_manager.stuff.a_function", b=20) config_dicts = [cd, cd2] runner = IterativeRunner(iv, config_dicts) assert runner.cases == [(cd, ), (cd2, )] result = runner.run() assert result == [ ( ({ "section_path_str": "test_pipeline_manager.stuff.a_function", "b": 10, "a": 2, }, ), (2, 10), ), ( ({ "section_path_str": "test_pipeline_manager.stuff.a_function", "b": 20, }, ), (None, 20), ), ]
def test_update_batch_with_plugins(self): self.add_plugin() self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() pipeline_manager.create('stuff2', a_function) sel = Selector() ivs = [ sel.test_pipeline_manager.stuff.a_function, sel.test_pipeline_manager.stuff2.a_function, ] expected_b_result = ["a", "b"] updates = [] for iv in ivs: section_path = SectionPath.from_section_str_list( SectionPath(iv.section_path_str)[1:]) updates.append( dict(b=expected_b_result, section_path_str=section_path.path_str)) pipeline_manager.update_batch(updates) for iv in ivs: result = pipeline_manager.run(iv) assert result == [ (None, OVERRIDDEN_B_RESULT), (None, OVERRIDDEN_B_RESULT), "abc", ] assert PRE_UPDATE_BATCH_COUNTER == 1 assert POST_UPDATE_BATCH_COUNTER == 1
def test_run_function(self): self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.stuff.a_function result = pipeline_manager.run(iv) assert result == (None, None)
def source_transform_func(ds: dc.DataSource) -> dc.DataSource: s = Selector() config: ConfigExample = s.dcpm.confs.ConfigExample() for variable in ds.load_variables: if variable.dtype.is_numeric: ds.df[variable.name] += config.a return ds
def test_specific_class_iv_from_selector(self): self.write_example_class_dict_to_file() pipeline_manager = self.create_pm( specific_class_config_dicts=CLASS_CONFIG_DICT_LIST) pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.example_class.stuff.data self.assert_valid_specific_class_iv(iv, pipeline_manager)
def ds_generator_func(columns: Sequence[dc.Column]) -> dc.DataSource: s = Selector() config: ConfigExample = s.dcpm.confs2.ConfigExample() df = EXPECT_GENERATED_DF.copy() df['D'] += config.a ds = dc.DataSource(df=df, columns=columns) return ds
def test_specific_class_iv_attribute_is_specific_class_from_selector(self): self.write_example_class_dict_to_file() pipeline_manager = self.create_pm( specific_class_config_dicts=CLASS_CONFIG_DICT_LIST) pipeline_manager.load() sel = Selector() pipeline_manager.create('example_class.stuff.data2') pipeline_manager.update( section_path_str='example_class.stuff.data', a=sel.test_pipeline_manager.example_class.stuff.data2) sel = Selector() iv = sel.test_pipeline_manager.example_class.stuff.data.a self.assert_valid_specific_class_iv(iv, pipeline_manager) # Accessing a second time was causing a new ItemView to be # created for the attribute itself, which is not expected. # Add this second check to ensure it doesn't happen again. iv = sel.test_pipeline_manager.example_class.stuff.data.a self.assert_valid_specific_class_iv(iv, pipeline_manager)
def test_hook_updates_context_during_operation(self): pipeline_manager = self.create_pm() pipeline_manager.load() self.create_entries(pipeline_manager) assert context.currently_running_section_path_str is None opts = dc.TransformOptions(assert_context_is_updated, transform_key='assert_context_updated') self.create_transform(pipeline_manager, 'transdata.temp.thing', opts=opts) s = Selector() self.create_analysis(pipeline_manager, 'analysis.temp.thing', data_source=s.dcpm.transdata.temp.thing) s = Selector() s.dcpm.analysis.temp.thing() assert OPERATION_COUNTER == 1 assert context.currently_running_section_path_str is None
def create_pm_with_function_and_run( self) -> Tuple[PipelineManager, ItemView, Any]: self.write_a_function_to_pipeline_dict_file() pipeline_manager = self.create_pm(include_logs=False) pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.stuff.a_function result = pipeline_manager.run(iv) return pipeline_manager, iv, result
def test_get_class(self): self.write_example_class_to_pipeline_dict_file() pipeline_manager = self.create_pm() pipeline_manager.load() sel = Selector() iv = sel.test_pipeline_manager.stuff.ExampleClass iv_obj = pipeline_manager.get(iv) str_obj = pipeline_manager.get('stuff.ExampleClass') assert iv_obj is str_obj is iv() assert iv_obj == ExampleClass(None)