예제 #1
0
def pyfileconf_iter_modify_cases(
    cases: List[Tuple[Dict[str, Any], ...]], runner: "IterativeRunner"
):
    """
    Reorder cases so that pipelines are re-run as little as possible

    :param cases: list of tuples of kwarg dictionaries which would normally be provided to .update
    :return: None
    """
    from pyfileconf.selector.models.itemview import ItemView

    # Gather unique config section path strs
    section_path_strs: List[str] = []
    for conf in runner.config_updates:
        if conf["section_path_str"] not in section_path_strs:
            section_path_strs.append(conf["section_path_str"])

    # Get views of items which dependent on the changing config
    config_deps = config_dependencies_for_section_path_strs(section_path_strs)

    # Get difficulty of executing all run items after a change in each config individually
    config_ivs = [
        ItemView.from_section_path_str(sp_str) for sp_str in section_path_strs
    ]
    dependent_ivs = list(set(itertools.chain(*config_deps.values())))
    run_ivs = [ItemView.from_section_path_str(sp.path_str) for sp in runner.run_items]
    dc_run_ivs = [iv for iv in run_ivs if isinstance(iv, (DataSource, DataPipeline))]

    de = DataExplorer(config_ivs + dependent_ivs + run_ivs)  # type: ignore
    difficulties: Dict[str, float] = {}
    for sp_str, dep_ivs in config_deps.items():
        dc_dep_ivs = [iv for iv in dep_ivs if isinstance(iv, (DataSource, DataPipeline))]
        if not dc_dep_ivs or not dc_run_ivs:
            # The relationship between this config and the running item has nothing to do
            # with datacode. This means as far as this plugin is concerned, these cases
            # should be put last (changing most often) since they do not require any
            # re-running of pipelines. Therefore they have zero difficulty.
            # But want to retain the order in which they are passed for consistency,
            # so instead assign a negative difficulty by its position
            difficulties[sp_str] = -section_path_strs.index(sp_str)
        else:
            difficulties[sp_str] = de.difficulty_between(dc_dep_ivs, dc_run_ivs)  # type: ignore
    ordered_sp_strs = section_path_strs.copy()
    ordered_sp_strs.sort(key=lambda sp_str: -difficulties[sp_str])

    get_sort_key = partial(
        _sort_key_for_case_tup, ordered_sp_strs, runner.config_updates
    )
    cases.sort(key=lambda case_tup: get_sort_key(case_tup))
예제 #2
0
    def create_explorer(self):
        dp = self.create_merge_pipeline()
        ds = self.create_source()

        data = dict(sources=[ds], pipelines=[dp])

        explorer = DataExplorer.from_dict(data)
        return explorer
예제 #3
0
    def test_get_difficulty_between(self):
        ds = self.create_source(name='one', difficulty=10)
        gp = self.create_generator_pipeline()
        gp.name = 'Generate One'
        ds2 = self.create_source(pipeline=gp, name='two', difficulty=20)
        dp = self.create_merge_pipeline(data_sources=[ds, ds2])
        dp.name = 'Merge One Two'
        da1 = self.create_analysis_pipeline(source=dp)
        da1.name = 'Analysis One'
        ds3 = self.create_source(name='three', difficulty=70)
        da2 = self.create_analysis_pipeline(source=ds2)
        da2.name = 'Analysis Two'

        de = DataExplorer([ds, gp, ds2, dp, da1, da2])
        # Single on each side
        difficulty = de.difficulty_between([ds2], [da1])
        assert difficulty == 120
        difficulty = de.difficulty_between([gp], [da1])
        assert difficulty == 170
        difficulty = de.difficulty_between([ds], [dp])
        assert difficulty == 60

        # Multiple begins, single end
        difficulty = de.difficulty_between([ds, ds2], [da1])
        assert difficulty == 130

        # Multiple ends, single begin
        difficulty = de.difficulty_between([ds2], [da1, da2])
        assert difficulty == 170

        # Multiple begins, multiple ends
        difficulty = de.difficulty_between([ds, ds2], [da1, da2])
        assert difficulty == 180

        # Test errors
        with self.assertRaises(ValueError) as cm:
            difficulty = de.difficulty_between([ds3], [da1])
            exc = cm.exception
            assert 'no direct link between the items could be determined' == str(
                exc)
        ds4 = self.create_source(name='four')
        with self.assertRaises(ValueError) as cm:
            difficulty = de.difficulty_between([ds4], [da1])
            exc = cm.exception
            assert f'must pass items which are already in DataExplorer, but got {ds4}' == str(
                exc)
예제 #4
0
    def test_get_custom_difficulty(self):
        dp = self.create_merge_pipeline()
        dp.difficulty = 15
        ds = self.create_source()
        ds.difficulty = 5

        difficulty = DataExplorer.get_difficulty_for([dp, ds])
        assert difficulty == 20
예제 #5
0
    def test_create_from_sources_and_pipelines_dict(self):
        dp = self.create_merge_pipeline()
        ds = self.create_source()

        data = dict(sources=[ds], pipelines=[dp])

        explorer = DataExplorer.from_dict(data)
        got_ds, got_dp = explorer.items
        assert ds == got_ds
        assert dp == got_dp
예제 #6
0
    def test_no_overlapping_difficulty(self):
        dp = self.create_merge_pipeline()
        ds = self.create_source()
        da1 = self.create_analysis_pipeline(source=ds)
        da1.name = 'Analysis One'
        da2 = self.create_analysis_pipeline(source=ds)
        da2.name = 'Analysis Two'

        difficulty = DataExplorer.get_difficulty_for([dp, ds, da1, da2])
        assert difficulty == EXPECT_DIFFICULTY + 100
예제 #7
0
    def test_get_roots(self):
        ds = self.create_source(name='one', difficulty=10)
        gp = self.create_generator_pipeline()
        gp.name = 'Generate One'
        ds2 = self.create_source(pipeline=gp, name='two', difficulty=20)
        dp = self.create_merge_pipeline(data_sources=[ds, ds2])
        dp.name = 'Merge One Two'
        da1 = self.create_analysis_pipeline(source=dp)
        da1.name = 'Analysis One'
        ds3 = self.create_source(name='three', difficulty=70)
        da2 = self.create_analysis_pipeline(source=ds2)
        da2.name = 'Analysis Two'

        de = DataExplorer([ds, gp, ds2, dp, da1, da2])
        roots = de.roots
        assert len(roots) == 2
        assert ds in roots
        assert gp in roots

        de = DataExplorer([ds, ds2, dp, da1, da2])
        roots = de.roots
        assert len(roots) == 2
        assert ds in roots
        assert ds2 in roots

        de = DataExplorer([ds, gp, dp, da1, da2])
        roots = de.roots
        assert len(roots) == 3
        assert ds in roots
        assert gp in roots
        assert da2 in roots

        de = DataExplorer([da1])
        roots = de.roots
        assert len(roots) == 1
        assert da1 in roots
예제 #8
0
    def test_get_difficulty_by_class_method(self):
        dp = self.create_merge_pipeline()
        ds = self.create_source()

        difficulty = DataExplorer.get_difficulty_for([dp, ds])
        assert difficulty == EXPECT_DIFFICULTY