def test_best_avg_recency_weight_lesser_is_better(): df = pd.DataFrame.from_dict( { "model_group_id": ["1", "2", "3", "1", "2", "3", "1", "2", "3"], "model_id": ["1", "2", "3", "4", "5", "6", "7", "8", "9"], "train_end_time": [ "2011-01-01", "2011-01-01", "2011-01-01", "2012-01-01", "2012-01-01", "2012-01-01", "2013-01-01", "2013-01-01", "2013-01-01", ], "metric": [ "false positives@", "false positives@", "false positives@", "false positives@", "false positives@", "false positives@", "false positives@", "false positives@", "false positives@", ], "parameter": [ "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", ], "raw_value": [20, 90, 40, 50, 50, 50, 80, 20, 50], "dist_from_best_case": [70, 0, 50, 0, 0, 0, 0, 60, 30], } ) df["train_end_time"] = pd.to_datetime(df["train_end_time"]) assert best_avg_recency_weight( df, "2013-01-01", "false positives@", "100_abs", 1.00, "linear" ) == ["3"] assert best_avg_recency_weight( df, "2013-01-01", "false positives@", "100_abs", 1.15, "linear" ) == ["3"] assert best_avg_recency_weight( df, "2013-01-01", "false positives@", "100_abs", 1.15, "linear", n=2 ) == ["3", "1"] assert best_avg_recency_weight( df, "2013-01-01", "false positives@", "100_abs", 1.50, "linear" ) == ["3"] assert best_avg_recency_weight( df, "2013-01-01", "false positives@", "100_abs", 1.50, "linear", n=2 ) == ["3", "2"]
def test_best_avg_recency_weight_greater_is_better(): df = pd.DataFrame.from_dict( { "model_group_id": ["1", "2", "3", "1", "2", "3", "1", "2", "3"], "model_id": ["1", "2", "3", "4", "5", "6", "7", "8", "9"], "train_end_time": [ "2011-01-01", "2011-01-01", "2011-01-01", "2012-01-01", "2012-01-01", "2012-01-01", "2013-01-01", "2013-01-01", "2013-01-01", ], "metric": [ "precision@", "precision@", "precision@", "precision@", "precision@", "precision@", "precision@", "precision@", "precision@", ], "parameter": [ "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", "100_abs", ], "raw_value": [0.8, 0.2, 0.4, 0.5, 0.5, 0.5, 0.2, 0.7, 0.5], "dist_from_best_case": [0.0, 0.4, 0.2, 0.0, 0.0, 0.0, 0.5, 0.0, 0.2], } ) df["train_end_time"] = pd.to_datetime(df["train_end_time"]) assert best_avg_recency_weight( df, "2013-01-01", "precision@", "100_abs", 1.00, "linear" ) == ["1"] assert best_avg_recency_weight( df, "2013-01-01", "precision@", "100_abs", 1.00, "linear", n=2 ) == ["1", "2"] assert best_avg_recency_weight( df, "2013-01-01", "precision@", "100_abs", 1.15, "linear" ) == ["1"] assert best_avg_recency_weight( df, "2013-01-01", "precision@", "100_abs", 1.50, "linear" ) == ["2"] assert best_avg_recency_weight( df, "2013-01-01", "precision@", "100_abs", 1.50, "linear", n=2 ) == ["2", "3"]
def test_best_avg_recency_weight_lesser_is_better(): df = pandas.DataFrame.from_dict({ 'model_group_id': ['1', '2', '3', '1', '2', '3', '1', '2', '3'], 'model_id': ['1', '2', '3', '4', '5', '6', '7', '8', '9'], 'train_end_time': [ '2011-01-01', '2011-01-01', '2011-01-01', '2012-01-01', '2012-01-01', '2012-01-01', '2013-01-01', '2013-01-01', '2013-01-01' ], 'metric': [ 'false positives@', 'false positives@', 'false positives@', 'false positives@', 'false positives@', 'false positives@', 'false positives@', 'false positives@', 'false positives@' ], 'parameter': [ '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs' ], 'raw_value': [20, 90, 40, 50, 50, 50, 80, 20, 50], 'dist_from_best_case': [70, 0, 50, 0, 0, 0, 0, 60, 30], }) df['train_end_time'] = pandas.to_datetime(df['train_end_time']) assert best_avg_recency_weight(df, '2013-01-01', 'false positives@', '100_abs', 1.00, 'linear') == ['3'] assert best_avg_recency_weight(df, '2013-01-01', 'false positives@', '100_abs', 1.15, 'linear') == ['3'] assert best_avg_recency_weight(df, '2013-01-01', 'false positives@', '100_abs', 1.15, 'linear', n=2) == ['3', '1'] assert best_avg_recency_weight(df, '2013-01-01', 'false positives@', '100_abs', 1.50, 'linear') == ['3'] assert best_avg_recency_weight(df, '2013-01-01', 'false positives@', '100_abs', 1.50, 'linear', n=2) == ['3', '2']
def test_best_avg_recency_weight_greater_is_better(): df = pandas.DataFrame.from_dict({ 'model_group_id': ['1', '2', '3', '1', '2', '3', '1', '2', '3'], 'model_id': ['1', '2', '3', '4', '5', '6', '7', '8', '9'], 'train_end_time': [ '2011-01-01', '2011-01-01', '2011-01-01', '2012-01-01', '2012-01-01', '2012-01-01', '2013-01-01', '2013-01-01', '2013-01-01' ], 'metric': [ 'precision@', 'precision@', 'precision@', 'precision@', 'precision@', 'precision@', 'precision@', 'precision@', 'precision@' ], 'parameter': [ '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs' ], 'raw_value': [0.8, 0.2, 0.4, 0.5, 0.5, 0.5, 0.2, 0.7, 0.5], 'dist_from_best_case': [0.0, 0.4, 0.2, 0.0, 0.0, 0.0, 0.5, 0.0, 0.2], }) df['train_end_time'] = pandas.to_datetime(df['train_end_time']) assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs', 1.00, 'linear') == ['1'] assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs', 1.00, 'linear', n=2) == ['1', '2'] assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs', 1.15, 'linear') == ['1'] assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs', 1.50, 'linear') == ['2'] assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs', 1.50, 'linear', n=2) == ['2', '3']