예제 #1
0
def test_best_average_two_metrics_greater_is_better():
    df = pandas.DataFrame.from_dict({
        "model_group_id": ["1", "1", "2", "2", "1", "1", "2", "2"],
        "model_id": ["1", "1", "2", "2", "3", "3", "4", "4"],
        "train_end_time": [
            "2011-01-01",
            "2011-01-01",
            "2011-01-01",
            "2011-01-01",
            "2012-01-01",
            "2012-01-01",
            "2012-01-01",
            "2012-01-01",
        ],
        "metric": [
            "precision@",
            "recall@",
            "precision@",
            "recall@",
            "precision@",
            "recall@",
            "precision@",
            "recall@",
        ],
        "parameter": [
            "100_abs",
            "100_abs",
            "100_abs",
            "100_abs",
            "100_abs",
            "100_abs",
            "100_abs",
            "100_abs",
        ],
        "raw_value": [0.6, 0.4, 0.4, 0.6, 0.5, 0.5, 0.4, 0.5],
        "dist_from_best_case": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    })

    assert best_average_two_metrics(df, "2013-01-01", "precision@", "100_abs",
                                    "recall@", "100_abs", 0.5) == ["1"]
    assert best_average_two_metrics(df,
                                    "2013-01-01",
                                    "precision@",
                                    "100_abs",
                                    "recall@",
                                    "100_abs",
                                    0.5,
                                    n=2) == ["1", "2"]
    assert best_average_two_metrics(df,
                                    "2013-01-01",
                                    "precision@",
                                    "100_abs",
                                    "recall@",
                                    "100_abs",
                                    0.1,
                                    n=2) == ["2", "1"]
예제 #2
0
def test_best_average_two_metrics_lesser_is_better():
    df = pandas.DataFrame.from_dict({
        'model_group_id': ['1', '1', '2', '2', '1', '1', '2', '2'],
        'model_id': ['1', '1', '2', '2', '3', '3', '4', '4'],
        'train_end_time': [
            '2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01',
            '2012-01-01', '2012-01-01', '2012-01-01', '2012-01-01'
        ],
        'metric': [
            'false positives@', 'false negatives@', 'false positives@',
            'false negatives@', 'false positives@', 'false negatives@',
            'false positives@', 'false negatives@'
        ],
        'parameter': [
            '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs',
            '100_abs', '100_abs'
        ],
        'raw_value': [20, 30, 40, 20, 20, 30, 40, 20],
        'dist_from_best_case': [0, 10, 20, 0, 0, 10, 20, 0],
    })

    assert best_average_two_metrics(df, '2013-01-01', 'false positives@',
                                    '100_abs', 'false negatives@', '100_abs',
                                    0.5) == ['1']
    assert best_average_two_metrics(df,
                                    '2013-01-01',
                                    'false positives@',
                                    '100_abs',
                                    'false negatives@',
                                    '100_abs',
                                    0.5,
                                    n=2) == ['1', '2']
    assert best_average_two_metrics(df,
                                    '2013-01-01',
                                    'false positives@',
                                    '100_abs',
                                    'false negatives@',
                                    '100_abs',
                                    0.1,
                                    n=2) == ['2', '1']
예제 #3
0
def test_best_average_two_metrics_greater_is_better():
    df = pandas.DataFrame.from_dict({
        'model_group_id': ['1', '1', '2', '2', '1', '1', '2', '2'],
        'model_id': ['1', '1', '2', '2', '3', '3', '4', '4'],
        'train_end_time': [
            '2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01',
            '2012-01-01', '2012-01-01', '2012-01-01', '2012-01-01'
        ],
        'metric': [
            'precision@', 'recall@', 'precision@', 'recall@', 'precision@',
            'recall@', 'precision@', 'recall@'
        ],
        'parameter': [
            '100_abs', '100_abs', '100_abs', '100_abs', '100_abs', '100_abs',
            '100_abs', '100_abs'
        ],
        'raw_value': [0.6, 0.4, 0.4, 0.6, 0.5, 0.5, 0.4, 0.5],
        'dist_from_best_case': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    })

    assert best_average_two_metrics(df, '2013-01-01', 'precision@', '100_abs',
                                    'recall@', '100_abs', 0.5) == ['1']
    assert best_average_two_metrics(df,
                                    '2013-01-01',
                                    'precision@',
                                    '100_abs',
                                    'recall@',
                                    '100_abs',
                                    0.5,
                                    n=2) == ['1', '2']
    assert best_average_two_metrics(df,
                                    '2013-01-01',
                                    'precision@',
                                    '100_abs',
                                    'recall@',
                                    '100_abs',
                                    0.1,
                                    n=2) == ['2', '1']
예제 #4
0
def test_best_average_two_metrics_lesser_is_better():
    df = pd.DataFrame.from_dict(
        {
            "model_group_id": ["1", "1", "2", "2", "1", "1", "2", "2"],
            "model_id": ["1", "1", "2", "2", "3", "3", "4", "4"],
            "train_end_time": [
                "2011-01-01",
                "2011-01-01",
                "2011-01-01",
                "2011-01-01",
                "2012-01-01",
                "2012-01-01",
                "2012-01-01",
                "2012-01-01",
            ],
            "metric": [
                "false positives@",
                "false negatives@",
                "false positives@",
                "false negatives@",
                "false positives@",
                "false negatives@",
                "false positives@",
                "false negatives@",
            ],
            "parameter": [
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
                "100_abs",
            ],
            "raw_value": [20, 30, 40, 20, 20, 30, 40, 20],
            "dist_from_best_case": [0, 10, 20, 0, 0, 10, 20, 0],
        }
    )

    assert best_average_two_metrics(
        df,
        "2013-01-01",
        "false positives@",
        "100_abs",
        "false negatives@",
        "100_abs",
        0.5,
    ) == ["1"]
    assert best_average_two_metrics(
        df,
        "2013-01-01",
        "false positives@",
        "100_abs",
        "false negatives@",
        "100_abs",
        0.5,
        n=2,
    ) == ["1", "2"]
    assert best_average_two_metrics(
        df,
        "2013-01-01",
        "false positives@",
        "100_abs",
        "false negatives@",
        "100_abs",
        0.1,
        n=2,
    ) == ["2", "1"]