예제 #1
0
def test_best_avg_recency_weight_lesser_is_better():
    df = pd.DataFrame.from_dict(
        {
            "model_group_id": ["1", "2", "3", "1", "2", "3", "1", "2", "3"],
            "model_id": ["1", "2", "3", "4", "5", "6", "7", "8", "9"],
            "train_end_time": [
                "2011-01-01",
                "2011-01-01",
                "2011-01-01",
                "2012-01-01",
                "2012-01-01",
                "2012-01-01",
                "2013-01-01",
                "2013-01-01",
                "2013-01-01",
            ],
            "metric": [
                "false positives@",
                "false positives@",
                "false positives@",
                "false positives@",
                "false positives@",
                "false positives@",
                "false positives@",
                "false positives@",
                "false positives@",
            ],
            "parameter": [
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
            ],
            "raw_value": [20, 90, 40, 50, 50, 50, 80, 20, 50],
            "dist_from_best_case": [70, 0, 50, 0, 0, 0, 0, 60, 30],
        }
    )
    df["train_end_time"] = pd.to_datetime(df["train_end_time"])

    assert best_avg_recency_weight(
        df, "2013-01-01", "false positives@", "100_abs", 1.00, "linear"
    ) == ["3"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "false positives@", "100_abs", 1.15, "linear"
    ) == ["3"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "false positives@", "100_abs", 1.15, "linear", n=2
    ) == ["3", "1"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "false positives@", "100_abs", 1.50, "linear"
    ) == ["3"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "false positives@", "100_abs", 1.50, "linear", n=2
    ) == ["3", "2"]
예제 #2
0
def test_best_avg_recency_weight_greater_is_better():
    df = pd.DataFrame.from_dict(
        {
            "model_group_id": ["1", "2", "3", "1", "2", "3", "1", "2", "3"],
            "model_id": ["1", "2", "3", "4", "5", "6", "7", "8", "9"],
            "train_end_time": [
                "2011-01-01",
                "2011-01-01",
                "2011-01-01",
                "2012-01-01",
                "2012-01-01",
                "2012-01-01",
                "2013-01-01",
                "2013-01-01",
                "2013-01-01",
            ],
            "metric": [
                "precision@",
                "precision@",
                "precision@",
                "precision@",
                "precision@",
                "precision@",
                "precision@",
                "precision@",
                "precision@",
            ],
            "parameter": [
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
            ],
            "raw_value": [0.8, 0.2, 0.4, 0.5, 0.5, 0.5, 0.2, 0.7, 0.5],
            "dist_from_best_case": [0.0, 0.4, 0.2, 0.0, 0.0, 0.0, 0.5, 0.0, 0.2],
        }
    )
    df["train_end_time"] = pd.to_datetime(df["train_end_time"])
    assert best_avg_recency_weight(
        df, "2013-01-01", "precision@", "100_abs", 1.00, "linear"
    ) == ["1"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "precision@", "100_abs", 1.00, "linear", n=2
    ) == ["1", "2"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "precision@", "100_abs", 1.15, "linear"
    ) == ["1"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "precision@", "100_abs", 1.50, "linear"
    ) == ["2"]
    assert best_avg_recency_weight(
        df, "2013-01-01", "precision@", "100_abs", 1.50, "linear", n=2
    ) == ["2", "3"]
예제 #3
0
def test_best_avg_recency_weight_lesser_is_better():
    df = pandas.DataFrame.from_dict({
        'model_group_id': ['1', '2', '3', '1', '2', '3', '1', '2', '3'],
        'model_id': ['1', '2', '3', '4', '5', '6', '7', '8', '9'],
        'train_end_time': [
            '2011-01-01', '2011-01-01', '2011-01-01', '2012-01-01',
            '2012-01-01', '2012-01-01', '2013-01-01', '2013-01-01',
            '2013-01-01'
        ],
        'metric': [
            'false positives@', 'false positives@', 'false positives@',
            'false positives@', 'false positives@', 'false positives@',
            'false positives@', 'false positives@', 'false positives@'
        ],
        'parameter': [
            '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs',
            '100_abs', '100_abs', '100_abs'
        ],
        'raw_value': [20, 90, 40, 50, 50, 50, 80, 20, 50],
        'dist_from_best_case': [70, 0, 50, 0, 0, 0, 0, 60, 30],
    })
    df['train_end_time'] = pandas.to_datetime(df['train_end_time'])

    assert best_avg_recency_weight(df, '2013-01-01', 'false positives@',
                                   '100_abs', 1.00, 'linear') == ['3']
    assert best_avg_recency_weight(df, '2013-01-01', 'false positives@',
                                   '100_abs', 1.15, 'linear') == ['3']
    assert best_avg_recency_weight(df,
                                   '2013-01-01',
                                   'false positives@',
                                   '100_abs',
                                   1.15,
                                   'linear',
                                   n=2) == ['3', '1']
    assert best_avg_recency_weight(df, '2013-01-01', 'false positives@',
                                   '100_abs', 1.50, 'linear') == ['3']
    assert best_avg_recency_weight(df,
                                   '2013-01-01',
                                   'false positives@',
                                   '100_abs',
                                   1.50,
                                   'linear',
                                   n=2) == ['3', '2']
예제 #4
0
def test_best_avg_recency_weight_greater_is_better():
    df = pandas.DataFrame.from_dict({
        'model_group_id': ['1', '2', '3', '1', '2', '3', '1', '2', '3'],
        'model_id': ['1', '2', '3', '4', '5', '6', '7', '8', '9'],
        'train_end_time': [
            '2011-01-01', '2011-01-01', '2011-01-01', '2012-01-01',
            '2012-01-01', '2012-01-01', '2013-01-01', '2013-01-01',
            '2013-01-01'
        ],
        'metric': [
            'precision@', 'precision@', 'precision@', 'precision@',
            'precision@', 'precision@', 'precision@', 'precision@',
            'precision@'
        ],
        'parameter': [
            '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs',
            '100_abs', '100_abs', '100_abs'
        ],
        'raw_value': [0.8, 0.2, 0.4, 0.5, 0.5, 0.5, 0.2, 0.7, 0.5],
        'dist_from_best_case': [0.0, 0.4, 0.2, 0.0, 0.0, 0.0, 0.5, 0.0, 0.2],
    })
    df['train_end_time'] = pandas.to_datetime(df['train_end_time'])
    assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs',
                                   1.00, 'linear') == ['1']
    assert best_avg_recency_weight(df,
                                   '2013-01-01',
                                   'precision@',
                                   '100_abs',
                                   1.00,
                                   'linear',
                                   n=2) == ['1', '2']
    assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs',
                                   1.15, 'linear') == ['1']
    assert best_avg_recency_weight(df, '2013-01-01', 'precision@', '100_abs',
                                   1.50, 'linear') == ['2']
    assert best_avg_recency_weight(df,
                                   '2013-01-01',
                                   'precision@',
                                   '100_abs',
                                   1.50,
                                   'linear',
                                   n=2) == ['2', '3']