def test_best_average_two_metrics_greater_is_better(): df = pandas.DataFrame.from_dict({ "model_group_id": ["1", "1", "2", "2", "1", "1", "2", "2"], "model_id": ["1", "1", "2", "2", "3", "3", "4", "4"], "train_end_time": [ "2011-01-01", "2011-01-01", "2011-01-01", "2011-01-01", "2012-01-01", "2012-01-01", "2012-01-01", "2012-01-01", ], "metric": [ "precision@", "recall@", "precision@", "recall@", "precision@", "recall@", "precision@", "recall@", ], "parameter": [ "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", ], "raw_value": [0.6, 0.4, 0.4, 0.6, 0.5, 0.5, 0.4, 0.5], "dist_from_best_case": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], }) assert best_average_two_metrics(df, "2013-01-01", "precision@", "100_abs", "recall@", "100_abs", 0.5) == ["1"] assert best_average_two_metrics(df, "2013-01-01", "precision@", "100_abs", "recall@", "100_abs", 0.5, n=2) == ["1", "2"] assert best_average_two_metrics(df, "2013-01-01", "precision@", "100_abs", "recall@", "100_abs", 0.1, n=2) == ["2", "1"]
def test_best_average_two_metrics_lesser_is_better(): df = pandas.DataFrame.from_dict({ 'model_group_id': ['1', '1', '2', '2', '1', '1', '2', '2'], 'model_id': ['1', '1', '2', '2', '3', '3', '4', '4'], 'train_end_time': [ '2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01', '2012-01-01', '2012-01-01', '2012-01-01', '2012-01-01' ], 'metric': [ 'false positives@', 'false negatives@', 'false positives@', 'false negatives@', 'false positives@', 'false negatives@', 'false positives@', 'false negatives@' ], 'parameter': [ '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs' ], 'raw_value': [20, 30, 40, 20, 20, 30, 40, 20], 'dist_from_best_case': [0, 10, 20, 0, 0, 10, 20, 0], }) assert best_average_two_metrics(df, '2013-01-01', 'false positives@', '100_abs', 'false negatives@', '100_abs', 0.5) == ['1'] assert best_average_two_metrics(df, '2013-01-01', 'false positives@', '100_abs', 'false negatives@', '100_abs', 0.5, n=2) == ['1', '2'] assert best_average_two_metrics(df, '2013-01-01', 'false positives@', '100_abs', 'false negatives@', '100_abs', 0.1, n=2) == ['2', '1']
def test_best_average_two_metrics_greater_is_better(): df = pandas.DataFrame.from_dict({ 'model_group_id': ['1', '1', '2', '2', '1', '1', '2', '2'], 'model_id': ['1', '1', '2', '2', '3', '3', '4', '4'], 'train_end_time': [ '2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01', '2012-01-01', '2012-01-01', '2012-01-01', '2012-01-01' ], 'metric': [ 'precision@', 'recall@', 'precision@', 'recall@', 'precision@', 'recall@', 'precision@', 'recall@' ], 'parameter': [ '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs' ], 'raw_value': [0.6, 0.4, 0.4, 0.6, 0.5, 0.5, 0.4, 0.5], 'dist_from_best_case': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], }) assert best_average_two_metrics(df, '2013-01-01', 'precision@', '100_abs', 'recall@', '100_abs', 0.5) == ['1'] assert best_average_two_metrics(df, '2013-01-01', 'precision@', '100_abs', 'recall@', '100_abs', 0.5, n=2) == ['1', '2'] assert best_average_two_metrics(df, '2013-01-01', 'precision@', '100_abs', 'recall@', '100_abs', 0.1, n=2) == ['2', '1']
def test_best_average_two_metrics_lesser_is_better(): df = pd.DataFrame.from_dict( { "model_group_id": ["1", "1", "2", "2", "1", "1", "2", "2"], "model_id": ["1", "1", "2", "2", "3", "3", "4", "4"], "train_end_time": [ "2011-01-01", "2011-01-01", "2011-01-01", "2011-01-01", "2012-01-01", "2012-01-01", "2012-01-01", "2012-01-01", ], "metric": [ "false positives@", "false negatives@", "false positives@", "false negatives@", "false positives@", "false negatives@", "false positives@", "false negatives@", ], "parameter": [ "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", ], "raw_value": [20, 30, 40, 20, 20, 30, 40, 20], "dist_from_best_case": [0, 10, 20, 0, 0, 10, 20, 0], } ) assert best_average_two_metrics( df, "2013-01-01", "false positives@", "100_abs", "false negatives@", "100_abs", 0.5, ) == ["1"] assert best_average_two_metrics( df, "2013-01-01", "false positives@", "100_abs", "false negatives@", "100_abs", 0.5, n=2, ) == ["1", "2"] assert best_average_two_metrics( df, "2013-01-01", "false positives@", "100_abs", "false negatives@", "100_abs", 0.1, n=2, ) == ["2", "1"]