def test_selection_rule_picker(): with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) distance_table, model_groups = create_sample_distance_table(engine) def pick_spiky(df, train_end_time): return [model_groups["spiky"].model_group_id] selection_rule_picker = SelectionRulePicker( distance_from_best_table=distance_table) results = selection_rule_picker.results_for_rule( bound_selection_rule=BoundSelectionRule(descriptive_name="spiky", function=pick_spiky, args={}), model_group_ids=[ mg.model_group_id for mg in model_groups.values() ], train_end_times=["2014-01-01", "2015-01-01", "2016-01-01"], regret_metric="precision@", regret_parameter="100_abs", ) assert [result["dist_from_best_case_next_time"] for result in results] == [ 0.19, 0.3, 0.12, ] assert [result["raw_value"] for result in results] == [0.45, 0.84, 0.45]
def test_selection_rule_picker_with_args(): with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) distance_table, model_groups = create_sample_distance_table(engine) def pick_highest_avg(df, train_end_time, metric, parameter): assert len(df["train_end_time"].unique()) == 2 subsetted = df[(df["metric"] == metric) & (df["parameter"] == parameter)] mean = subsetted.groupby(["model_group_id"])["raw_value"].mean() return [mean.nlargest(1).index[0]] selection_rule_picker = SelectionRulePicker( distance_from_best_table=distance_table) regrets = [ result["dist_from_best_case_next_time"] for result in selection_rule_picker.results_for_rule( bound_selection_rule=BoundSelectionRule( descriptive_name="pick_highest_avg", function=pick_highest_avg, args={ "metric": "recall@", "parameter": "100_abs" }, ), model_group_ids=[ mg.model_group_id for mg in model_groups.values() ], train_end_times=["2015-01-01"], regret_metric="precision@", regret_parameter="100_abs", ) ] # picking the highest avg recall will pick 'spiky' for this time assert regrets == [0.3]
def test_selection_rule_picker_with_args(): with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) distance_table, model_groups = create_sample_distance_table(engine) def pick_highest_avg(df, train_end_time, metric, parameter): assert len(df['train_end_time'].unique()) == 2 subsetted = df[(df['metric'] == metric) & (df['parameter'] == parameter)] mean = subsetted.groupby(['model_group_id'])['raw_value'].mean() return [mean.nlargest(1).index[0]] selection_rule_picker = SelectionRulePicker( distance_from_best_table=distance_table) regrets = [ result['dist_from_best_case_next_time'] for result in selection_rule_picker.results_for_rule( bound_selection_rule=BoundSelectionRule( descriptive_name='pick_highest_avg', function=pick_highest_avg, args={ 'metric': 'recall@', 'parameter': '100_abs' }, ), model_group_ids=[ mg.model_group_id for mg in model_groups.values() ], train_end_times=['2015-01-01'], regret_metric='precision@', regret_parameter='100_abs', ) ] # picking the highest avg recall will pick 'spiky' for this time assert regrets == [0.3]