Ejemplo n.º 1
0
def test_model_scoring_inspections(db_engine_with_results_schema):
    testing_metric_groups = [
        {
            "metrics": ["precision@", "recall@", "fpr@"],
            "thresholds": {
                "percentiles": [50.0],
                "top_n": [3]
            },
        },
        {
            # ensure we test a non-thresholded metric as well
            "metrics": ["accuracy"]
        },
    ]
    training_metric_groups = [{
        "metrics": ["accuracy"],
        "thresholds": {
            "percentiles": [50.0]
        }
    }]

    model_evaluator = ModelEvaluator(
        testing_metric_groups,
        training_metric_groups,
        db_engine_with_results_schema,
    )

    testing_labels = np.array([1, 0, np.nan, 1, 0])
    testing_prediction_probas = np.array([0.56, 0.4, 0.55, 0.5, 0.3])

    training_labels = np.array([0, 0, 1, 1, 1, 0, 1, 1])
    training_prediction_probas = np.array(
        [0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6])

    fake_train_matrix_store = MockMatrixStore("train", "efgh", 5,
                                              db_engine_with_results_schema,
                                              training_labels)
    fake_test_matrix_store = MockMatrixStore("test", "1234", 5,
                                             db_engine_with_results_schema,
                                             testing_labels)

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    # Evaluate testing matrix and test the results
    model_evaluator.evaluate(testing_prediction_probas, fake_test_matrix_store,
                             model_id)
    for record in db_engine_with_results_schema.execute(
            """select * from test_results.evaluations
        where model_id = %s and evaluation_start_time = %s
        order by 1""",
        (model_id, fake_test_matrix_store.as_of_dates[0]),
    ):
        assert record["num_labeled_examples"] == 4
        assert record["num_positive_labels"] == 2
        if record["parameter"] == "":
            assert record["num_labeled_above_threshold"] == 4
        elif "pct" in record["parameter"]:
            assert record["num_labeled_above_threshold"] == 1
        else:
            assert record["num_labeled_above_threshold"] == 2

    # Evaluate the training matrix and test the results
    model_evaluator.evaluate(training_prediction_probas,
                             fake_train_matrix_store, model_id)
    for record in db_engine_with_results_schema.execute(
            """select * from train_results.evaluations
        where model_id = %s and evaluation_start_time = %s
        order by 1""",
        (model_id, fake_train_matrix_store.as_of_dates[0]),
    ):
        assert record["num_labeled_examples"] == 8
        assert record["num_positive_labels"] == 5
        assert record["worst_value"] == 0.625
        assert record["best_value"] == 0.625
        assert record["stochastic_value"] == 0.625
        # best/worst are same, should shortcut trials
        assert record["num_sort_trials"] == 0
        assert record["standard_deviation"] == 0
Ejemplo n.º 2
0
def test_evaluating_early_warning():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        metric_groups = [{
            'metrics': [
                'precision@', 'recall@', 'true positives@', 'true negatives@',
                'false positives@', 'false negatives@'
            ],
            'thresholds': {
                'percentiles': [5.0, 10.0],
                'top_n': [5, 10]
            }
        }, {
            'metrics': [
                'f1', 'mediocre', 'accuracy', 'roc_auc',
                'average precision score'
            ],
        }, {
            'metrics': ['fbeta@'],
            'parameters': [{
                'beta': 0.75
            }, {
                'beta': 1.25
            }]
        }]

        custom_metrics = {'mediocre': always_half}

        model_evaluator = ModelEvaluator(metric_groups,
                                         db_engine,
                                         custom_metrics=custom_metrics)

        trained_model, model_id = fake_trained_model(
            'myproject', InMemoryModelStorageEngine('myproject'), db_engine)

        labels = fake_labels(5)
        as_of_date = datetime.date(2016, 5, 5)
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], labels, model_id,
            as_of_date, as_of_date, '1y')

        # assert
        # that all of the records are there
        records = [
            row[0] for row in db_engine.execute(
                '''select distinct(metric || parameter)
                from results.evaluations
                where model_id = %s and
                evaluation_start_time = %s order by 1''', (model_id,
                                                           as_of_date))
        ]
        assert records == [
            'accuracy', 'average precision score', 'f1',
            'false [email protected]_pct', 'false negatives@10_abs',
            'false [email protected]_pct', 'false negatives@5_abs',
            'false [email protected]_pct', 'false positives@10_abs',
            'false [email protected]_pct', 'false positives@5_abs',
            '[email protected]_beta', '[email protected]_beta', 'mediocre',
            '[email protected]_pct', 'precision@10_abs', '[email protected]_pct',
            'precision@5_abs', '[email protected]_pct', 'recall@10_abs',
            '[email protected]_pct', 'recall@5_abs', 'roc_auc',
            'true [email protected]_pct', 'true negatives@10_abs',
            'true [email protected]_pct', 'true negatives@5_abs',
            'true [email protected]_pct', 'true positives@10_abs',
            'true [email protected]_pct', 'true positives@5_abs'
        ]
Ejemplo n.º 3
0
def test_evaluating_early_warning(db_engine_with_results_schema):
    num_entities = 10
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

    # Set up testing configuration parameters
    testing_metric_groups = [
        {
            "metrics": [
                "precision@",
                "recall@",
                "true positives@",
                "true negatives@",
                "false positives@",
                "false negatives@",
            ],
            "thresholds": {
                "percentiles": [5.0, 10.0],
                "top_n": [5, 10]
            },
        },
        {
            "metrics": [
                "f1",
                "mediocre",
                "accuracy",
                "roc_auc",
                "average precision score",
            ]
        },
        {
            "metrics": ["fbeta@"],
            "parameters": [{
                "beta": 0.75
            }, {
                "beta": 1.25
            }]
        },
    ]

    training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}]

    custom_metrics = {"mediocre": always_half}

    # Acquire fake data and objects to be used in the tests
    model_evaluator = ModelEvaluator(
        testing_metric_groups,
        training_metric_groups,
        db_engine_with_results_schema,
        custom_metrics=custom_metrics,
    )

    fake_test_matrix_store = MockMatrixStore(
        matrix_type="test",
        matrix_uuid="efgh",
        label_count=num_entities,
        db_engine=db_engine_with_results_schema,
        init_labels=pd.DataFrame({
            "label_value": labels,
            "entity_id": list(range(num_entities)),
            "as_of_date": [TRAIN_END_TIME] * num_entities,
        }).set_index(["entity_id", "as_of_date"]).label_value,
        init_as_of_dates=[TRAIN_END_TIME],
    )
    fake_train_matrix_store = MockMatrixStore(
        matrix_type="train",
        matrix_uuid="1234",
        label_count=num_entities,
        db_engine=db_engine_with_results_schema,
        init_labels=pd.DataFrame({
            "label_value": labels,
            "entity_id": list(range(num_entities)),
            "as_of_date": [TRAIN_END_TIME] * num_entities,
        }).set_index(["entity_id", "as_of_date"]).label_value,
        init_as_of_dates=[TRAIN_END_TIME],
    )

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    # ensure that the matrix uuid is present
    matrix_uuids = [
        row[0] for row in db_engine_with_results_schema.execute(
            "select matrix_uuid from test_results.evaluations")
    ]
    assert all(matrix_uuid == "efgh" for matrix_uuid in matrix_uuids)

    # Evaluate the training metrics and test
    model_evaluator.evaluate(
        trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store,
        model_id)
    records = [
        row[0] for row in db_engine_with_results_schema.execute(
            """select distinct(metric || parameter)
            from train_results.evaluations
            where model_id = %s and
            evaluation_start_time = %s
            order by 1""",
            (model_id, fake_train_matrix_store.as_of_dates[0]),
        )
    ]
    assert records == ["accuracy", "roc_auc"]

    # Run tests for overall and subset evaluations
    for subset in SUBSETS:
        if subset is None:
            where_hash = ""
        else:
            populate_subset_data(db_engine_with_results_schema, subset,
                                 list(range(num_entities)))
            SubsetFactory(subset_hash=filename_friendly_hash(subset))
            session.commit()
            where_hash = f"and subset_hash = '{filename_friendly_hash(subset)}'"

        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_test_matrix_store,
            model_id,
            subset=subset,
        )

        records = [
            row[0] for row in db_engine_with_results_schema.execute(
                f"""\
                select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                {where_hash}
                order by 1
                """,
                (model_id, fake_test_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == [
            "accuracy",
            "average precision score",
            "f1",
            "false [email protected]_pct",
            "false negatives@10_abs",
            "false [email protected]_pct",
            "false negatives@5_abs",
            "false [email protected]_pct",
            "false positives@10_abs",
            "false [email protected]_pct",
            "false positives@5_abs",
            "[email protected]_beta",
            "[email protected]_beta",
            "mediocre",
            "[email protected]_pct",
            "precision@10_abs",
            "[email protected]_pct",
            "precision@5_abs",
            "[email protected]_pct",
            "recall@10_abs",
            "[email protected]_pct",
            "recall@5_abs",
            "roc_auc",
            "true [email protected]_pct",
            "true negatives@10_abs",
            "true [email protected]_pct",
            "true negatives@5_abs",
            "true [email protected]_pct",
            "true positives@10_abs",
            "true [email protected]_pct",
            "true positives@5_abs",
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_train_matrix_store,
            model_id,
            subset=subset,
        )

        records = [
            row[0] for row in db_engine_with_results_schema.execute(
                f"""select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                {where_hash}
                order by 1""",
                (model_id, fake_train_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == ["accuracy", "roc_auc"]

    # ensure that the matrix uuid is present
    matrix_uuids = [
        row[0] for row in db_engine_with_results_schema.execute(
            "select matrix_uuid from train_results.evaluations")
    ]
    assert all(matrix_uuid == "1234" for matrix_uuid in matrix_uuids)
Ejemplo n.º 4
0
def test_predictor_retrieve():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        project_path = 'econ-dev/inspections'
        model_storage_engine = InMemoryModelStorageEngine(project_path)

        _, model_id = \
            fake_trained_model(project_path, model_storage_engine, db_engine, train_matrix_uuid='1234')

        predictor = Predictor(project_path,
                              model_storage_engine,
                              db_engine,
                              replace=False)

        dayone = datetime.date(2011, 1,
                               1).strftime(predictor.expected_matrix_ts_format)
        daytwo = datetime.date(2011, 1,
                               2).strftime(predictor.expected_matrix_ts_format)

        # create prediction set
        matrix_data = {
            'entity_id': [1, 2, 1, 2],
            'as_of_date': [dayone, dayone, daytwo, daytwo],
            'feature_one': [3, 4, 5, 6],
            'feature_two': [5, 6, 7, 8],
            'label': [7, 8, 8, 7]
        }
        matrix = pandas.DataFrame.from_dict(matrix_data)\
            .set_index(['entity_id', 'as_of_date'])
        metadata = {
            'label_name': 'label',
            'end_time': AS_OF_DATE,
            'label_timespan': '3month',
            'metta-uuid': '1234',
            'indices': ['entity_id', 'as_of_date'],
            'matrix_type': 'test'
        }

        matrix_store = InMemoryMatrixStore(matrix, metadata)

        predict_proba = predictor.predict(
            model_id,
            matrix_store,
            misc_db_parameters=dict(),
            train_matrix_columns=['feature_one', 'feature_two'])

        # When run again, the predictions retrieved from the database
        # should match.
        #
        # Some trickiness here. Let's explain:
        #
        # If we are not careful, retrieving predictions from the database and
        # presenting them as a numpy array can result in a bad ordering,
        # since the given matrix may not be 'ordered' by some criteria
        # that can be easily represented by an ORDER BY clause.
        #
        # It will sometimes work, because without ORDER BY you will get
        # it back in the table's physical order, which unless something has
        # happened to the table will be the order you inserted it,
        # which could very well be the order in the matrix.
        # So it's not a bug that would necessarily immediately show itself,
        # but when it does go wrong your scores will be garbage.
        #
        # So we simulate a table order mutation that can happen over time:
        # Remove the first row and put it at the end.
        # If the Predictor doesn't explicitly reorder the results, this will fail
        # Only running on TestPrediction because TrainPrediction behaves the exact same way
        reorder_session = sessionmaker(bind=db_engine)()
        obj = reorder_session.query(TestPrediction).first()
        reorder_session.delete(obj)
        reorder_session.commit()

        make_transient(obj)
        reorder_session = sessionmaker(bind=db_engine)()
        reorder_session.add(obj)
        reorder_session.commit()

        predictor.load_model = Mock()
        new_predict_proba = predictor.predict(
            model_id,
            matrix_store,
            misc_db_parameters=dict(),
            train_matrix_columns=['feature_one', 'feature_two'])
        assert_array_equal(new_predict_proba, predict_proba)
        assert not predictor.load_model.called
Ejemplo n.º 5
0
def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [{
            'metrics': ['precision@', 'recall@', 'fpr@'],
            'thresholds': {'percentiles': [50.0], 'top_n': [3]}
        }, {
            # ensure we test a non-thresholded metric as well
            'metrics': ['accuracy'],
        }]
        training_metric_groups = [{'metrics': ['accuracy'], 'thresholds': {'percentiles': [50.0]}}]

        model_evaluator = ModelEvaluator(testing_metric_groups, training_metric_groups, db_engine)

        testing_labels = numpy.array([True, False, numpy.nan, True, False])
        testing_prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3])

        training_labels = numpy.array([False, False, True, True, True, False, True, True])
        training_prediction_probas = numpy.array([0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6])

        fake_train_matrix_store = MockMatrixStore('train', 'efgh', 5, db_engine, training_labels)
        fake_test_matrix_store = MockMatrixStore('test', '1234', 5, db_engine, testing_labels)

        trained_model, model_id = fake_trained_model(
            'myproject',
            InMemoryModelStorageEngine('myproject'),
            db_engine
        )

        # Evaluate testing matrix and test the results
        model_evaluator.evaluate(
            testing_prediction_probas,
            fake_test_matrix_store,
            model_id,
        )
        for record in db_engine.execute(
            '''select * from test_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1''',
            (model_id, fake_test_matrix_store.as_of_dates[0])
        ):
            assert record['num_labeled_examples'] == 4
            assert record['num_positive_labels'] == 2
            if record['parameter'] == '':
                assert record['num_labeled_above_threshold'] == 4
            elif 'pct' in record['parameter']:
                assert record['num_labeled_above_threshold'] == 1
            else:
                assert record['num_labeled_above_threshold'] == 2

        # Evaluate the training matrix and test the results
        model_evaluator.evaluate(
                    training_prediction_probas,
                    fake_train_matrix_store,
                    model_id,
        )
        for record in db_engine.execute(
            '''select * from train_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1''',
            (model_id, fake_train_matrix_store.as_of_dates[0])
        ):
            assert record['num_labeled_examples'] == 8
            assert record['num_positive_labels'] == 5
            assert record['value'] == 0.625
Ejemplo n.º 6
0
def test_predictor():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            project_path = 'econ-dev/inspections'
            model_storage_engine = S3ModelStorageEngine(project_path)

            _, model_id = \
                fake_trained_model(project_path, model_storage_engine, db_engine, train_matrix_uuid='1234')

            predictor = Predictor(project_path, model_storage_engine,
                                  db_engine)

            # create prediction set
            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')

            metadata = {
                'label_name': 'label',
                'end_time': AS_OF_DATE,
                'label_timespan': '3month',
                'metta-uuid': '1234',
                'indices': ['entity_id'],
            }

            train_matrix_columns = ['feature_one', 'feature_two']

            # Runs the same test for training and testing predictions
            for mat_type in ("train", "test"):
                # Create the matrix to be tested and store in db
                metadata['matrix_type'] = mat_type

                matrix_store = InMemoryMatrixStore(matrix, metadata)

                # Note, the first time 'matrix' is used, the label column is popped.
                # It must be added back in to 'matrix' to create another matrix_store.
                matrix['label'] = [7, 8]

                predict_proba = predictor.predict(
                    model_id,
                    matrix_store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=train_matrix_columns)

                # assert
                # 1. that the returned predictions are of the desired length
                assert len(predict_proba) == 2

                # 2. that the predictions table entries are present and
                # can be linked to the original models
                records = [
                    row for row in db_engine.execute(
                        '''select entity_id, as_of_date
                    from {}_results.predictions
                    join model_metadata.models using (model_id)'''.format(
                            mat_type, mat_type))
                ]
                assert len(records) == 2

                # 3. that the contained as_of_dates match what we sent in
                for record in records:
                    assert record[1].date() == AS_OF_DATE

                # 4. that the entity ids match the given dataset
                assert sorted([record[0] for record in records]) == [1, 2]

            # 5. running with same model_id, different as of date
            # then with same as of date only replaces the records
            # with the same date
            new_matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            new_metadata = {
                'label_name': 'label',
                'end_time': AS_OF_DATE + datetime.timedelta(days=1),
                'label_timespan': '3month',
                'metta-uuid': '1234',
                'indices': ['entity_id'],
            }

            # Runs the same test for training and testing predictions
            for mat_type in ("train", "test"):

                # Create the matrix to be tested and store in db
                new_metadata['matrix_type'] = mat_type

                new_matrix_store = InMemoryMatrixStore(new_matrix,
                                                       new_metadata)

                # Adding 'label' column back into new_matrix
                new_matrix['label'] = [7, 8]

                predictor.predict(model_id,
                                  new_matrix_store,
                                  misc_db_parameters=dict(),
                                  train_matrix_columns=train_matrix_columns)
                predictor.predict(model_id,
                                  matrix_store,
                                  misc_db_parameters=dict(),
                                  train_matrix_columns=train_matrix_columns)
                records = [
                    row for row in db_engine.execute(
                        '''select entity_id, as_of_date
                    from {}_results.predictions
                    join model_metadata.models using (model_id)'''.format(
                            mat_type, mat_type))
                ]
                assert len(records) == 4

            # 6. That we can delete the model when done prediction on it
            predictor.delete_model(model_id)
            assert predictor.load_model(model_id) == None
Ejemplo n.º 7
0
def test_predictor_composite_index():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        project_path = 'econ-dev/inspections'
        model_storage_engine = InMemoryModelStorageEngine(project_path)

        _, model_id = \
            fake_trained_model(project_path, model_storage_engine, db_engine, train_matrix_uuid='1234')

        predictor = Predictor(project_path, model_storage_engine, db_engine)

        dayone = datetime.datetime(2011, 1, 1)
        daytwo = datetime.datetime(2011, 1, 2)

        # create prediction set
        matrix = pandas.DataFrame.from_dict({
            'entity_id': [1, 2, 1, 2],
            'as_of_date': [dayone, dayone, daytwo, daytwo],
            'feature_one': [3, 4, 5, 6],
            'feature_two': [5, 6, 7, 8],
            'label': [7, 8, 8, 7]
        }).set_index(['entity_id', 'as_of_date'])
        metadata = {
            'label_name': 'label',
            'end_time': AS_OF_DATE,
            'label_timespan': '3month',
            'metta-uuid': '1234',
            'indices': ['entity_id', 'as_of_date'],
        }

        # Runs the same test for training and testing predictions
        for mat_type in ("train", "test"):

            # Create the matrix to be tested and store in db
            metadata['matrix_type'] = mat_type
            matrix_store = InMemoryMatrixStore(matrix, metadata)

            # Adding 'label' column back into matrix
            matrix['label'] = [7, 8, 8, 7]

            predict_proba = predictor.predict(
                model_id,
                matrix_store,
                misc_db_parameters=dict(),
                train_matrix_columns=['feature_one', 'feature_two'])

            # assert
            # 1. that the returned predictions are of the desired length
            assert len(predict_proba) == 4

            # 2. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row
                for row in db_engine.execute('''select entity_id, as_of_date
                from {}_results.predictions
                join model_metadata.models using (model_id)'''.format(
                    mat_type, mat_type))
            ]
            assert len(records) == 4
Ejemplo n.º 8
0
def test_predictor():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            project_path = 'econ-dev/inspections'
            model_storage_engine = S3ModelStorageEngine(s3_conn, project_path)
            _, model_id = \
                fake_trained_model(project_path, model_storage_engine, db_engine)
            predictor = Predictor(project_path, model_storage_engine, db_engine)
            # create prediction set
            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            metadata = {
                'label_name': 'label',
                'end_time': AS_OF_DATE,
                'label_window': '3month',
                'metta-uuid': '1234',
            }

            matrix_store = InMemoryMatrixStore(matrix, metadata)
            predict_proba = predictor.predict(model_id, matrix_store, misc_db_parameters=dict())

            # assert
            # 1. that the returned predictions are of the desired length
            assert len(predict_proba) == 2

            # 2. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row for row in
                db_engine.execute('''select entity_id, as_of_date
                from results.predictions
                join results.models using (model_id)''')
            ]
            assert len(records) == 2

            # 3. that the contained as_of_dates match what we sent in
            for record in records:
                assert record[1].date() == AS_OF_DATE

            # 4. that the entity ids match the given dataset
            assert sorted([record[0] for record in records]) == [1, 2]

            # 5. running with same model_id, different as of date
            # then with same as of date only replaces the records
            # with the same date
            new_matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            new_metadata = {
                'label_name': 'label',
                'end_time': AS_OF_DATE + datetime.timedelta(days=1),
                'label_window': '3month',
                'metta-uuid': '1234',
            }
            new_matrix_store = InMemoryMatrixStore(new_matrix, new_metadata)
            predictor.predict(model_id, new_matrix_store, misc_db_parameters=dict())
            predictor.predict(model_id, matrix_store, misc_db_parameters=dict())
            records = [
                row for row in
                db_engine.execute('''select entity_id, as_of_date
                from results.predictions
                join results.models using (model_id)''')
            ]
            assert len(records) == 4

            # 6. That we can delete the model when done prediction on it
            predictor.delete_model(model_id)
            assert predictor.load_model(model_id) == None
Ejemplo n.º 9
0
def test_evaluating_early_warning():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [
            {
                "metrics": [
                    "precision@",
                    "recall@",
                    "true positives@",
                    "true negatives@",
                    "false positives@",
                    "false negatives@",
                ],
                "thresholds": {
                    "percentiles": [5.0, 10.0],
                    "top_n": [5, 10]
                },
            },
            {
                "metrics": [
                    "f1",
                    "mediocre",
                    "accuracy",
                    "roc_auc",
                    "average precision score",
                ]
            },
            {
                "metrics": ["fbeta@"],
                "parameters": [{
                    "beta": 0.75
                }, {
                    "beta": 1.25
                }]
            },
        ]

        training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}]

        custom_metrics = {"mediocre": always_half}

        model_evaluator = ModelEvaluator(
            testing_metric_groups,
            training_metric_groups,
            db_engine,
            custom_metrics=custom_metrics,
        )

        labels = fake_labels(5)
        fake_train_matrix_store = MockMatrixStore("train", "efgh", 5,
                                                  db_engine, labels)
        fake_test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine,
                                                 labels)

        trained_model, model_id = fake_trained_model(db_engine)

        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], fake_test_matrix_store,
            model_id)
        records = [
            row[0] for row in db_engine.execute(
                """select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1""",
                (model_id, fake_test_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == [
            "accuracy",
            "average precision score",
            "f1",
            "false [email protected]_pct",
            "false negatives@10_abs",
            "false [email protected]_pct",
            "false negatives@5_abs",
            "false [email protected]_pct",
            "false positives@10_abs",
            "false [email protected]_pct",
            "false positives@5_abs",
            "[email protected]_beta",
            "[email protected]_beta",
            "mediocre",
            "[email protected]_pct",
            "precision@10_abs",
            "[email protected]_pct",
            "precision@5_abs",
            "[email protected]_pct",
            "recall@10_abs",
            "[email protected]_pct",
            "recall@5_abs",
            "roc_auc",
            "true [email protected]_pct",
            "true negatives@10_abs",
            "true [email protected]_pct",
            "true negatives@5_abs",
            "true [email protected]_pct",
            "true positives@10_abs",
            "true [email protected]_pct",
            "true positives@5_abs",
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store,
            model_id)
        records = [
            row[0] for row in db_engine.execute(
                """select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1""",
                (model_id, fake_train_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == ["accuracy", "roc_auc"]
Ejemplo n.º 10
0
def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [
            {
                "metrics": ["precision@", "recall@", "fpr@"],
                "thresholds": {
                    "percentiles": [50.0],
                    "top_n": [3]
                },
            },
            {
                # ensure we test a non-thresholded metric as well
                "metrics": ["accuracy"]
            },
        ]
        training_metric_groups = [{
            "metrics": ["accuracy"],
            "thresholds": {
                "percentiles": [50.0]
            }
        }]

        model_evaluator = ModelEvaluator(testing_metric_groups,
                                         training_metric_groups, db_engine)

        testing_labels = numpy.array([True, False, numpy.nan, True, False])
        testing_prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3])

        training_labels = numpy.array(
            [False, False, True, True, True, False, True, True])
        training_prediction_probas = numpy.array(
            [0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6])

        fake_train_matrix_store = MockMatrixStore("train", "efgh", 5,
                                                  db_engine, training_labels)
        fake_test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine,
                                                 testing_labels)

        trained_model, model_id = fake_trained_model(db_engine)

        # Evaluate testing matrix and test the results
        model_evaluator.evaluate(testing_prediction_probas,
                                 fake_test_matrix_store, model_id)
        for record in db_engine.execute(
                """select * from test_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1""",
            (model_id, fake_test_matrix_store.as_of_dates[0]),
        ):
            assert record["num_labeled_examples"] == 4
            assert record["num_positive_labels"] == 2
            if record["parameter"] == "":
                assert record["num_labeled_above_threshold"] == 4
            elif "pct" in record["parameter"]:
                assert record["num_labeled_above_threshold"] == 1
            else:
                assert record["num_labeled_above_threshold"] == 2

        # Evaluate the training matrix and test the results
        model_evaluator.evaluate(training_prediction_probas,
                                 fake_train_matrix_store, model_id)
        for record in db_engine.execute(
                """select * from train_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1""",
            (model_id, fake_train_matrix_store.as_of_dates[0]),
        ):
            assert record["num_labeled_examples"] == 8
            assert record["num_positive_labels"] == 5
            assert record["value"] == 0.625