Exemple #1
0
def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        metric_groups = [{
            'metrics': ['precision@', 'recall@'],
            'thresholds': {
                'percentiles': [5.0, 10.0],
                'top_n': [5, 10]
            }
        }]

        model_scorer = ModelScorer(metric_groups, db_engine)

        trained_model, model_id = fake_trained_model(
            'myproject', InMemoryModelStorageEngine('myproject'), db_engine)

        labels = fake_labels(5)
        as_of_date = datetime.date(2016, 5, 5)
        evaluation_start = datetime.datetime(2016, 4, 1)
        evaluation_end = datetime.datetime(2016, 7, 1)
        prediction_frequency = '1d'
        model_scorer.score(
            trained_model.predict_proba(labels)[:, 1],
            trained_model.predict(labels), labels, model_id, evaluation_start,
            evaluation_end, prediction_frequency)

        # assert
        # that all of the records are there
        results = db_engine.execute(
            '''select distinct(metric || parameter) from results.evaluations
                where model_id = %s and evaluation_start_time = %s order by 1''',
            (model_id, evaluation_start))
        records = [row[0] for row in results]
        assert records == [
            '[email protected]_pct',
            'precision@10_abs',
            '[email protected]_pct',
            'precision@5_abs',
            '[email protected]_pct',
            'recall@10_abs',
            '[email protected]_pct',
            'recall@5_abs',
        ]
Exemple #2
0
def test_evaluating_early_warning():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [
            {
                "metrics": [
                    "precision@",
                    "recall@",
                    "true positives@",
                    "true negatives@",
                    "false positives@",
                    "false negatives@",
                ],
                "thresholds": {"percentiles": [5.0, 10.0], "top_n": [5, 10]},
            },
            {
                "metrics": [
                    "f1",
                    "mediocre",
                    "accuracy",
                    "roc_auc",
                    "average precision score",
                ]
            },
            {"metrics": ["fbeta@"], "parameters": [{"beta": 0.75}, {"beta": 1.25}]},
        ]

        training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}]

        custom_metrics = {"mediocre": always_half}

        model_evaluator = ModelEvaluator(
            testing_metric_groups,
            training_metric_groups,
            db_engine,
            custom_metrics=custom_metrics,
        )

        labels = fake_labels(5)
        fake_train_matrix_store = MockMatrixStore("train", "efgh", 5, db_engine, labels)
        fake_test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine, labels)

        trained_model, model_id = fake_trained_model(db_engine)

        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], fake_test_matrix_store, model_id
        )
        records = [
            row[0]
            for row in db_engine.execute(
                """select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1""",
                (model_id, fake_test_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == [
            "accuracy",
            "average precision score",
            "f1",
            "false [email protected]_pct",
            "false negatives@10_abs",
            "false [email protected]_pct",
            "false negatives@5_abs",
            "false [email protected]_pct",
            "false positives@10_abs",
            "false [email protected]_pct",
            "false positives@5_abs",
            "[email protected]_beta",
            "[email protected]_beta",
            "mediocre",
            "[email protected]_pct",
            "precision@10_abs",
            "[email protected]_pct",
            "precision@5_abs",
            "[email protected]_pct",
            "recall@10_abs",
            "[email protected]_pct",
            "recall@5_abs",
            "roc_auc",
            "true [email protected]_pct",
            "true negatives@10_abs",
            "true [email protected]_pct",
            "true negatives@5_abs",
            "true [email protected]_pct",
            "true positives@10_abs",
            "true [email protected]_pct",
            "true positives@5_abs",
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id
        )
        records = [
            row[0]
            for row in db_engine.execute(
                """select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1""",
                (model_id, fake_train_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == ["accuracy", "roc_auc"]
Exemple #3
0
def test_evaluating_early_warning():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        metric_groups = [{
            'metrics': [
                'precision@', 'recall@', 'true positives@', 'true negatives@',
                'false positives@', 'false negatives@'
            ],
            'thresholds': {
                'percentiles': [5.0, 10.0],
                'top_n': [5, 10]
            }
        }, {
            'metrics': [
                'f1', 'mediocre', 'accuracy', 'roc_auc',
                'average precision score'
            ],
        }, {
            'metrics': ['fbeta@'],
            'parameters': [{
                'beta': 0.75
            }, {
                'beta': 1.25
            }]
        }]

        custom_metrics = {'mediocre': always_half}

        model_evaluator = ModelEvaluator(metric_groups,
                                         db_engine,
                                         custom_metrics=custom_metrics)

        trained_model, model_id = fake_trained_model(
            'myproject', InMemoryModelStorageEngine('myproject'), db_engine)

        labels = fake_labels(5)
        as_of_date = datetime.date(2016, 5, 5)
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], labels, model_id,
            as_of_date, as_of_date, '1y')

        # assert
        # that all of the records are there
        records = [
            row[0] for row in db_engine.execute(
                '''select distinct(metric || parameter)
                from results.evaluations
                where model_id = %s and
                evaluation_start_time = %s order by 1''', (model_id,
                                                           as_of_date))
        ]
        assert records == [
            'accuracy', 'average precision score', 'f1',
            'false [email protected]_pct', 'false negatives@10_abs',
            'false [email protected]_pct', 'false negatives@5_abs',
            'false [email protected]_pct', 'false positives@10_abs',
            'false [email protected]_pct', 'false positives@5_abs',
            '[email protected]_beta', '[email protected]_beta', 'mediocre',
            '[email protected]_pct', 'precision@10_abs', '[email protected]_pct',
            'precision@5_abs', '[email protected]_pct', 'recall@10_abs',
            '[email protected]_pct', 'recall@5_abs', 'roc_auc',
            'true [email protected]_pct', 'true negatives@10_abs',
            'true [email protected]_pct', 'true negatives@5_abs',
            'true [email protected]_pct', 'true positives@10_abs',
            'true [email protected]_pct', 'true positives@5_abs'
        ]