Exemple #1
0
def test_selection_rule_picker():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        distance_table, model_groups = create_sample_distance_table(engine)

        def pick_spiky(df, train_end_time):
            return [model_groups["spiky"].model_group_id]

        selection_rule_picker = SelectionRulePicker(
            distance_from_best_table=distance_table)

        results = selection_rule_picker.results_for_rule(
            bound_selection_rule=BoundSelectionRule(descriptive_name="spiky",
                                                    function=pick_spiky,
                                                    args={}),
            model_group_ids=[
                mg.model_group_id for mg in model_groups.values()
            ],
            train_end_times=["2014-01-01", "2015-01-01", "2016-01-01"],
            regret_metric="precision@",
            regret_parameter="100_abs",
        )
        assert [result["dist_from_best_case_next_time"]
                for result in results] == [
                    0.19,
                    0.3,
                    0.12,
                ]
        assert [result["raw_value"]
                for result in results] == [0.45, 0.84, 0.45]
Exemple #2
0
def test_selection_rule_picker_with_args():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        distance_table, model_groups = create_sample_distance_table(engine)

        def pick_highest_avg(df, train_end_time, metric, parameter):
            assert len(df["train_end_time"].unique()) == 2
            subsetted = df[(df["metric"] == metric)
                           & (df["parameter"] == parameter)]
            mean = subsetted.groupby(["model_group_id"])["raw_value"].mean()
            return [mean.nlargest(1).index[0]]

        selection_rule_picker = SelectionRulePicker(
            distance_from_best_table=distance_table)
        regrets = [
            result["dist_from_best_case_next_time"]
            for result in selection_rule_picker.results_for_rule(
                bound_selection_rule=BoundSelectionRule(
                    descriptive_name="pick_highest_avg",
                    function=pick_highest_avg,
                    args={
                        "metric": "recall@",
                        "parameter": "100_abs"
                    },
                ),
                model_group_ids=[
                    mg.model_group_id for mg in model_groups.values()
                ],
                train_end_times=["2015-01-01"],
                regret_metric="precision@",
                regret_parameter="100_abs",
            )
        ]
        # picking the highest avg recall will pick 'spiky' for this time
        assert regrets == [0.3]
Exemple #3
0
def test_selection_rule_picker_with_args():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        distance_table, model_groups = create_sample_distance_table(engine)

        def pick_highest_avg(df, train_end_time, metric, parameter):
            assert len(df['train_end_time'].unique()) == 2
            subsetted = df[(df['metric'] == metric)
                           & (df['parameter'] == parameter)]
            mean = subsetted.groupby(['model_group_id'])['raw_value'].mean()
            return [mean.nlargest(1).index[0]]

        selection_rule_picker = SelectionRulePicker(
            distance_from_best_table=distance_table)
        regrets = [
            result['dist_from_best_case_next_time']
            for result in selection_rule_picker.results_for_rule(
                bound_selection_rule=BoundSelectionRule(
                    descriptive_name='pick_highest_avg',
                    function=pick_highest_avg,
                    args={
                        'metric': 'recall@',
                        'parameter': '100_abs'
                    },
                ),
                model_group_ids=[
                    mg.model_group_id for mg in model_groups.values()
                ],
                train_end_times=['2015-01-01'],
                regret_metric='precision@',
                regret_parameter='100_abs',
            )
        ]
        # picking the highest avg recall will pick 'spiky' for this time
        assert regrets == [0.3]