def pyfileconf_iter_modify_cases( cases: List[Tuple[Dict[str, Any], ...]], runner: "IterativeRunner" ): """ Reorder cases so that pipelines are re-run as little as possible :param cases: list of tuples of kwarg dictionaries which would normally be provided to .update :return: None """ from pyfileconf.selector.models.itemview import ItemView # Gather unique config section path strs section_path_strs: List[str] = [] for conf in runner.config_updates: if conf["section_path_str"] not in section_path_strs: section_path_strs.append(conf["section_path_str"]) # Get views of items which dependent on the changing config config_deps = config_dependencies_for_section_path_strs(section_path_strs) # Get difficulty of executing all run items after a change in each config individually config_ivs = [ ItemView.from_section_path_str(sp_str) for sp_str in section_path_strs ] dependent_ivs = list(set(itertools.chain(*config_deps.values()))) run_ivs = [ItemView.from_section_path_str(sp.path_str) for sp in runner.run_items] dc_run_ivs = [iv for iv in run_ivs if isinstance(iv, (DataSource, DataPipeline))] de = DataExplorer(config_ivs + dependent_ivs + run_ivs) # type: ignore difficulties: Dict[str, float] = {} for sp_str, dep_ivs in config_deps.items(): dc_dep_ivs = [iv for iv in dep_ivs if isinstance(iv, (DataSource, DataPipeline))] if not dc_dep_ivs or not dc_run_ivs: # The relationship between this config and the running item has nothing to do # with datacode. This means as far as this plugin is concerned, these cases # should be put last (changing most often) since they do not require any # re-running of pipelines. Therefore they have zero difficulty. # But want to retain the order in which they are passed for consistency, # so instead assign a negative difficulty by its position difficulties[sp_str] = -section_path_strs.index(sp_str) else: difficulties[sp_str] = de.difficulty_between(dc_dep_ivs, dc_run_ivs) # type: ignore ordered_sp_strs = section_path_strs.copy() ordered_sp_strs.sort(key=lambda sp_str: -difficulties[sp_str]) get_sort_key = partial( _sort_key_for_case_tup, ordered_sp_strs, runner.config_updates ) cases.sort(key=lambda case_tup: get_sort_key(case_tup))
def create_explorer(self): dp = self.create_merge_pipeline() ds = self.create_source() data = dict(sources=[ds], pipelines=[dp]) explorer = DataExplorer.from_dict(data) return explorer
def test_get_difficulty_between(self): ds = self.create_source(name='one', difficulty=10) gp = self.create_generator_pipeline() gp.name = 'Generate One' ds2 = self.create_source(pipeline=gp, name='two', difficulty=20) dp = self.create_merge_pipeline(data_sources=[ds, ds2]) dp.name = 'Merge One Two' da1 = self.create_analysis_pipeline(source=dp) da1.name = 'Analysis One' ds3 = self.create_source(name='three', difficulty=70) da2 = self.create_analysis_pipeline(source=ds2) da2.name = 'Analysis Two' de = DataExplorer([ds, gp, ds2, dp, da1, da2]) # Single on each side difficulty = de.difficulty_between([ds2], [da1]) assert difficulty == 120 difficulty = de.difficulty_between([gp], [da1]) assert difficulty == 170 difficulty = de.difficulty_between([ds], [dp]) assert difficulty == 60 # Multiple begins, single end difficulty = de.difficulty_between([ds, ds2], [da1]) assert difficulty == 130 # Multiple ends, single begin difficulty = de.difficulty_between([ds2], [da1, da2]) assert difficulty == 170 # Multiple begins, multiple ends difficulty = de.difficulty_between([ds, ds2], [da1, da2]) assert difficulty == 180 # Test errors with self.assertRaises(ValueError) as cm: difficulty = de.difficulty_between([ds3], [da1]) exc = cm.exception assert 'no direct link between the items could be determined' == str( exc) ds4 = self.create_source(name='four') with self.assertRaises(ValueError) as cm: difficulty = de.difficulty_between([ds4], [da1]) exc = cm.exception assert f'must pass items which are already in DataExplorer, but got {ds4}' == str( exc)
def test_get_custom_difficulty(self): dp = self.create_merge_pipeline() dp.difficulty = 15 ds = self.create_source() ds.difficulty = 5 difficulty = DataExplorer.get_difficulty_for([dp, ds]) assert difficulty == 20
def test_create_from_sources_and_pipelines_dict(self): dp = self.create_merge_pipeline() ds = self.create_source() data = dict(sources=[ds], pipelines=[dp]) explorer = DataExplorer.from_dict(data) got_ds, got_dp = explorer.items assert ds == got_ds assert dp == got_dp
def test_no_overlapping_difficulty(self): dp = self.create_merge_pipeline() ds = self.create_source() da1 = self.create_analysis_pipeline(source=ds) da1.name = 'Analysis One' da2 = self.create_analysis_pipeline(source=ds) da2.name = 'Analysis Two' difficulty = DataExplorer.get_difficulty_for([dp, ds, da1, da2]) assert difficulty == EXPECT_DIFFICULTY + 100
def test_get_roots(self): ds = self.create_source(name='one', difficulty=10) gp = self.create_generator_pipeline() gp.name = 'Generate One' ds2 = self.create_source(pipeline=gp, name='two', difficulty=20) dp = self.create_merge_pipeline(data_sources=[ds, ds2]) dp.name = 'Merge One Two' da1 = self.create_analysis_pipeline(source=dp) da1.name = 'Analysis One' ds3 = self.create_source(name='three', difficulty=70) da2 = self.create_analysis_pipeline(source=ds2) da2.name = 'Analysis Two' de = DataExplorer([ds, gp, ds2, dp, da1, da2]) roots = de.roots assert len(roots) == 2 assert ds in roots assert gp in roots de = DataExplorer([ds, ds2, dp, da1, da2]) roots = de.roots assert len(roots) == 2 assert ds in roots assert ds2 in roots de = DataExplorer([ds, gp, dp, da1, da2]) roots = de.roots assert len(roots) == 3 assert ds in roots assert gp in roots assert da2 in roots de = DataExplorer([da1]) roots = de.roots assert len(roots) == 1 assert da1 in roots
def test_get_difficulty_by_class_method(self): dp = self.create_merge_pipeline() ds = self.create_source() difficulty = DataExplorer.get_difficulty_for([dp, ds]) assert difficulty == EXPECT_DIFFICULTY