Python ModelEvaluator.evaluateの例、triage.component.catwalk.evaluation.ModelEvaluator.evaluate Pythonの例

コード例 #1

0

ファイルを表示

def test_all_same_labels(db_engine_with_results_schema):
    num_entities = 5
    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    for label_value in [0, 1]:
        labels = [label_value] * num_entities

        # We should be able to calculate accuracy even if all of the labels
        # are the same, but ROC_AUC requires some positive and some
        # negative labels, so we should get one NULL value
        # for this config
        training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}]

        # Acquire fake data and objects to be used in the tests
        model_evaluator = ModelEvaluator(
            {},
            training_metric_groups,
            db_engine_with_results_schema,
        )
        fake_matrix_store = MockMatrixStore(
            matrix_type="train",
            matrix_uuid=str(labels),
            label_count=num_entities,
            db_engine=db_engine_with_results_schema,
            init_labels=pd.DataFrame(
                {
                    "label_value": labels,
                    "entity_id": list(range(num_entities)),
                    "as_of_date": [TRAIN_END_TIME] * num_entities,
                }
            )
            .set_index(["entity_id", "as_of_date"])
            .label_value,
            init_as_of_dates=[TRAIN_END_TIME],
        )

        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], fake_matrix_store, model_id
        )

        for metric, best, worst, stochastic in db_engine_with_results_schema.execute(
            f"""select metric, best_value, worst_value, stochastic_value
            from train_results.evaluations
            where model_id = %s and
            evaluation_start_time = %s
            order by 1""",
            (model_id, fake_matrix_store.as_of_dates[0]),
        ):
            if metric == "accuracy":
                assert best is not None
                assert worst is not None
                assert stochastic is not None
            else:
                assert best is None
                assert worst is None
                assert stochastic is None

コード例 #2

0

ファイルを表示

def test_evaluation_with_protected_df(db_engine_with_results_schema):
    # Test that if a protected_df is passed (along with bias config, the only real needed one
    # being threshold info), an Aequitas report is written to the database.
    model_evaluator = ModelEvaluator(
        testing_metric_groups=[
            {
                "metrics": ["precision@"],
                "thresholds": {
                    "top_n": [3]
                },
            },
        ],
        training_metric_groups=[],
        bias_config={'thresholds': {
            'top_n': [2]
        }},
        db_engine=db_engine_with_results_schema,
    )
    testing_labels = np.array([1, 0])
    testing_prediction_probas = np.array([0.56, 0.55])

    fake_test_matrix_store = MockMatrixStore("test", "1234", 5,
                                             db_engine_with_results_schema,
                                             testing_labels)

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    protected_df = pd.DataFrame({
        "entity_id":
        fake_test_matrix_store.design_matrix.index.levels[0].tolist(),
        "protectedattribute1":
        "value1"
    })

    model_evaluator.evaluate(testing_prediction_probas, fake_test_matrix_store,
                             model_id, protected_df)
    for record in db_engine_with_results_schema.execute(
            """select * from test_results.aequitas
        where model_id = %s and evaluation_start_time = %s
        order by 1""",
        (model_id, fake_test_matrix_store.as_of_dates[0]),
    ):
        assert record['model_id'] == model_id
        assert record['parameter'] == '2_abs'
        assert record['attribute_name'] == 'protectedattribute1'
        assert record['attribute_value'] == 'value1'

コード例 #3

0

ファイルを表示

def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        metric_groups = [
            {
                'metrics': ['precision@', 'recall@', 'fpr@'],
                'thresholds': {
                    'percentiles': [50.0],
                    'top_n': [3]
                }
            },
            {
                # ensure we test a non-thresholded metric as well
                'metrics': ['accuracy'],
            }
        ]

        model_evaluator = ModelEvaluator(metric_groups, db_engine)

        _, model_id = fake_trained_model(
            'myproject', InMemoryModelStorageEngine('myproject'), db_engine)

        labels = numpy.array([True, False, numpy.nan, True, False])
        prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3])
        evaluation_start = datetime.datetime(2016, 4, 1)
        evaluation_end = datetime.datetime(2016, 7, 1)
        example_as_of_date_frequency = '1d'
        model_evaluator.evaluate(prediction_probas, labels, model_id,
                                 evaluation_start, evaluation_end,
                                 example_as_of_date_frequency)

        for record in db_engine.execute(
                '''select * from results.evaluations
            where model_id = %s and evaluation_start_time = %s order by 1''',
            (model_id, evaluation_start)):
            assert record['num_labeled_examples'] == 4
            assert record['num_positive_labels'] == 2
            if record['parameter'] == '':
                assert record['num_labeled_above_threshold'] == 4
            elif 'pct' in record['parameter']:
                assert record['num_labeled_above_threshold'] == 1
            else:
                assert record['num_labeled_above_threshold'] == 2

コード例 #4

0

ファイルを表示

def test_evaluation_with_sort_ties(db_engine_with_results_schema):
    model_evaluator = ModelEvaluator(
        testing_metric_groups=[
            {
                "metrics": ["precision@"],
                "thresholds": {
                    "top_n": [3]
                },
            },
        ],
        training_metric_groups=[],
        db_engine=db_engine_with_results_schema,
    )
    testing_labels = np.array([1, 0, 1, 0, 0])
    testing_prediction_probas = np.array([0.56, 0.55, 0.5, 0.5, 0.3])

    fake_test_matrix_store = MockMatrixStore("test", "1234", 5,
                                             db_engine_with_results_schema,
                                             testing_labels)

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )
    model_evaluator.evaluate(testing_prediction_probas, fake_test_matrix_store,
                             model_id)
    for record in db_engine_with_results_schema.execute(
            """select * from test_results.evaluations
        where model_id = %s and evaluation_start_time = %s
        order by 1""",
        (model_id, fake_test_matrix_store.as_of_dates[0]),
    ):
        assert record["num_labeled_examples"] == 5
        assert record["num_positive_labels"] == 2
        assert_almost_equal(float(record["worst_value"]), 0.33333, 5)
        assert_almost_equal(float(record["best_value"]), 0.66666, 5)
        assert record["num_sort_trials"] == SORT_TRIALS
        assert record["stochastic_value"] > record["worst_value"]
        assert record["stochastic_value"] < record["best_value"]
        assert record["standard_deviation"]

コード例 #5

0

ファイルを表示

ファイル: model_testers.py プロジェクト: SPETHW/triage

class ModelTester(object):
    def __init__(
        self,
        db_engine,
        model_storage_engine,
        matrix_storage_engine,
        replace,
        evaluator_config,
        individual_importance_config,
    ):
        self.matrix_storage_engine = matrix_storage_engine
        self.predictor = Predictor(
            db_engine=db_engine,
            model_storage_engine=model_storage_engine,
            replace=replace,
        )

        self.individual_importance_calculator = IndividualImportanceCalculator(
            db_engine=db_engine,
            n_ranks=individual_importance_config.get("n_ranks", 5),
            methods=individual_importance_config.get("methods", ["uniform"]),
            replace=replace,
        )

        self.evaluator = ModelEvaluator(
            db_engine=db_engine,
            sort_seed=evaluator_config.get("sort_seed", None),
            testing_metric_groups=evaluator_config.get("testing_metric_groups", []),
            training_metric_groups=evaluator_config.get("training_metric_groups", []),
        )

    def generate_model_test_tasks(self, split, train_store, model_ids):
        test_tasks = []
        for test_matrix_def, test_uuid in zip(
            split["test_matrices"], split["test_uuids"]
        ):
            test_store = self.matrix_storage_engine.get_store(test_uuid)

            if test_store.empty:
                logging.warning(
                    """Test matrix for uuid %s
                was empty, no point in generating predictions. Not creating test task.
                """,
                    test_uuid,
                )
                continue
            test_tasks.append(
                {
                    "test_store": test_store,
                    "train_store": train_store,
                    "model_ids": [model_id for model_id in model_ids if model_id],
                }
            )
        return test_tasks

    def process_model_test_task(self, test_store, train_store, model_ids):
        as_of_times = test_store.metadata["as_of_times"]
        logging.info(
            "Testing and scoring all model ids with test matrix %s. "
            "as_of_times min: %s max: %s num: %s",
            test_store.uuid,
            min(as_of_times),
            max(as_of_times),
            len(as_of_times),
        )
        for model_id in model_ids:
            logging.info("Testing model id %s", model_id)

            self.individual_importance_calculator.calculate_and_save_all_methods_and_dates(
                model_id, test_store
            )

            # Generate predictions for the testing data then training data
            for store in (test_store, train_store):
                predictions_proba = self.predictor.predict(
                    model_id,
                    store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=train_store.columns(),
                )

                self.evaluator.evaluate(
                    predictions_proba=predictions_proba,
                    matrix_store=store,
                    model_id=model_id,
                )

コード例 #6

0

ファイルを表示

ファイル: test_evaluation.py プロジェクト: SPETHW/triage

def test_evaluating_early_warning():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [
            {
                "metrics": [
                    "precision@",
                    "recall@",
                    "true positives@",
                    "true negatives@",
                    "false positives@",
                    "false negatives@",
                ],
                "thresholds": {"percentiles": [5.0, 10.0], "top_n": [5, 10]},
            },
            {
                "metrics": [
                    "f1",
                    "mediocre",
                    "accuracy",
                    "roc_auc",
                    "average precision score",
                ]
            },
            {"metrics": ["fbeta@"], "parameters": [{"beta": 0.75}, {"beta": 1.25}]},
        ]

        training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}]

        custom_metrics = {"mediocre": always_half}

        model_evaluator = ModelEvaluator(
            testing_metric_groups,
            training_metric_groups,
            db_engine,
            custom_metrics=custom_metrics,
        )

        labels = fake_labels(5)
        fake_train_matrix_store = MockMatrixStore("train", "efgh", 5, db_engine, labels)
        fake_test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine, labels)

        trained_model, model_id = fake_trained_model(db_engine)

        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], fake_test_matrix_store, model_id
        )
        records = [
            row[0]
            for row in db_engine.execute(
                """select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1""",
                (model_id, fake_test_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == [
            "accuracy",
            "average precision score",
            "f1",
            "false [email protected]_pct",
            "false negatives@10_abs",
            "false [email protected]_pct",
            "false negatives@5_abs",
            "false [email protected]_pct",
            "false positives@10_abs",
            "false [email protected]_pct",
            "false positives@5_abs",
            "[email protected]_beta",
            "[email protected]_beta",
            "mediocre",
            "[email protected]_pct",
            "precision@10_abs",
            "[email protected]_pct",
            "precision@5_abs",
            "[email protected]_pct",
            "recall@10_abs",
            "[email protected]_pct",
            "recall@5_abs",
            "roc_auc",
            "true [email protected]_pct",
            "true negatives@10_abs",
            "true [email protected]_pct",
            "true negatives@5_abs",
            "true [email protected]_pct",
            "true positives@10_abs",
            "true [email protected]_pct",
            "true positives@5_abs",
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id
        )
        records = [
            row[0]
            for row in db_engine.execute(
                """select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1""",
                (model_id, fake_train_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == ["accuracy", "roc_auc"]

コード例 #7

0

ファイルを表示

ファイル: test_evaluation.py プロジェクト: SPETHW/triage

def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [
            {
                "metrics": ["precision@", "recall@", "fpr@"],
                "thresholds": {"percentiles": [50.0], "top_n": [3]},
            },
            {
                # ensure we test a non-thresholded metric as well
                "metrics": ["accuracy"]
            },
        ]
        training_metric_groups = [
            {"metrics": ["accuracy"], "thresholds": {"percentiles": [50.0]}}
        ]

        model_evaluator = ModelEvaluator(
            testing_metric_groups, training_metric_groups, db_engine
        )

        testing_labels = numpy.array([True, False, numpy.nan, True, False])
        testing_prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3])

        training_labels = numpy.array(
            [False, False, True, True, True, False, True, True]
        )
        training_prediction_probas = numpy.array(
            [0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6]
        )

        fake_train_matrix_store = MockMatrixStore(
            "train", "efgh", 5, db_engine, training_labels
        )
        fake_test_matrix_store = MockMatrixStore(
            "test", "1234", 5, db_engine, testing_labels
        )

        trained_model, model_id = fake_trained_model(db_engine)

        # Evaluate testing matrix and test the results
        model_evaluator.evaluate(
            testing_prediction_probas, fake_test_matrix_store, model_id
        )
        for record in db_engine.execute(
            """select * from test_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1""",
            (model_id, fake_test_matrix_store.as_of_dates[0]),
        ):
            assert record["num_labeled_examples"] == 4
            assert record["num_positive_labels"] == 2
            if record["parameter"] == "":
                assert record["num_labeled_above_threshold"] == 4
            elif "pct" in record["parameter"]:
                assert record["num_labeled_above_threshold"] == 1
            else:
                assert record["num_labeled_above_threshold"] == 2

        # Evaluate the training matrix and test the results
        model_evaluator.evaluate(
            training_prediction_probas, fake_train_matrix_store, model_id
        )
        for record in db_engine.execute(
            """select * from train_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1""",
            (model_id, fake_train_matrix_store.as_of_dates[0]),
        ):
            assert record["num_labeled_examples"] == 8
            assert record["num_positive_labels"] == 5
            assert record["value"] == 0.625

コード例 #8

0

ファイルを表示

def test_evaluation_sorting_with_protected_df(db_engine_with_results_schema):
    # Test that if a protected_df is passed (along with bias config, the only real needed one
    # being threshold info), an Aequitas report is written to the database.
    model_evaluator = ModelEvaluator(
        testing_metric_groups=[
            {
                "metrics": ["precision@"],
                "thresholds": {"top_n": [3]},
            },
        ],
        training_metric_groups=[],
        bias_config={"thresholds": {"top_n": [2]}},
        db_engine=db_engine_with_results_schema,
    )
    testing_labels = np.array([1, 1, 1, 0, 1])
    testing_prediction_probas = np.array([0.56, 0.55, 0.92, 0.85, 0.24])

    fake_test_matrix_store = MockMatrixStore(
        "test",
        "1234",
        5,
        db_engine_with_results_schema,
        metadata_overrides={"as_of_times": [TRAIN_END_TIME]},
        matrix=pd.DataFrame.from_dict(
            {
                "entity_id": [1, 2, 3, 4, 5],
                "as_of_date": [pd.Timestamp(2016, 1, 1)] * 5,
                "feature_one": [3, 4, 3, 4, 3],
                "feature_two": [5, 6, 5, 6, 5],
                "label": testing_labels,
            }
        ).set_index(MatrixStore.indices),
        init_labels=pd.DataFrame(
            {
                "label_value": testing_labels,
                "entity_id": [1, 2, 3, 4, 5],
                "as_of_date": [pd.Timestamp(2016, 1, 1)] * 5,
            }
        )
        .set_index(["entity_id", "as_of_date"])
        .label_value,
        init_as_of_dates=[TRAIN_END_TIME],
    )

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    protected_df = pd.DataFrame(
        {
            # "entity_id": fake_test_matrix_store.design_matrix.index.levels[0].tolist(),
            # "as_of_date": fake_test_matrix_store.design_matrix.index.levels[1].tolist(),
            "protectedattribute1": ["low", "low", "low", "high", "high"]
        },
        index=fake_test_matrix_store.design_matrix.index,
    )
    # should be low has 3 records, all 1's; high has 2 records, one 1

    expected = {
        "low": {"group_size": 3, "group_label_neg": 0, "group_label_pos": 3},
        "high": {"group_size": 2, "group_label_neg": 1, "group_label_pos": 1},
    }

    model_evaluator.evaluate(
        testing_prediction_probas, fake_test_matrix_store, model_id, protected_df
    )

    for record in db_engine_with_results_schema.execute(
        """select * from test_results.aequitas
        where model_id = %s and evaluation_start_time = %s
        order by 1""",
        (model_id, fake_test_matrix_store.as_of_dates[0]),
    ):
        assert record["model_id"] == model_id
        assert record["parameter"] == "2_abs"
        assert record["attribute_name"] == "protectedattribute1"
        for col, value in expected[record["attribute_value"]].items():
            assert record[col] == value

コード例 #9

0

ファイルを表示

def test_model_scoring_inspections(db_engine_with_results_schema):
    testing_metric_groups = [
        {
            "metrics": ["precision@", "recall@", "fpr@"],
            "thresholds": {"percentiles": [50.0], "top_n": [3]},
        },
        {
            # ensure we test a non-thresholded metric as well
            "metrics": ["accuracy"]
        },
    ]
    training_metric_groups = [
        {"metrics": ["accuracy"], "thresholds": {"percentiles": [50.0]}}
    ]

    model_evaluator = ModelEvaluator(
        testing_metric_groups,
        training_metric_groups,
        db_engine_with_results_schema,
    )

    testing_labels = np.array([1, 0, np.nan, 1, 0])
    testing_prediction_probas = np.array([0.56, 0.4, 0.55, 0.5, 0.3])

    training_labels = np.array([0, 0, 1, 1, 1, 0, 1, 1])
    training_prediction_probas = np.array([0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6])

    fake_train_matrix_store = MockMatrixStore(
        "train", "efgh", 5, db_engine_with_results_schema, training_labels
    )
    fake_test_matrix_store = MockMatrixStore(
        "test", "1234", 5, db_engine_with_results_schema, testing_labels
    )

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    # Evaluate testing matrix and test the results
    model_evaluator.evaluate(
        testing_prediction_probas, fake_test_matrix_store, model_id
    )
    for record in db_engine_with_results_schema.execute(
        """select * from test_results.evaluations
        where model_id = %s and evaluation_start_time = %s
        order by 1""",
        (model_id, fake_test_matrix_store.as_of_dates[0]),
    ):
        assert record["num_labeled_examples"] == 4
        assert record["num_positive_labels"] == 2
        if record["parameter"] == "":
            assert record["num_labeled_above_threshold"] == 4
        elif "pct" in record["parameter"]:
            assert record["num_labeled_above_threshold"] == 1
        else:
            assert record["num_labeled_above_threshold"] == 2

    # Evaluate the training matrix and test the results
    model_evaluator.evaluate(
        training_prediction_probas, fake_train_matrix_store, model_id
    )
    for record in db_engine_with_results_schema.execute(
        """select * from train_results.evaluations
        where model_id = %s and evaluation_start_time = %s
        order by 1""",
        (model_id, fake_train_matrix_store.as_of_dates[0]),
    ):
        assert record["num_labeled_examples"] == 8
        assert record["num_positive_labels"] == 5
        assert record["worst_value"] == 0.625
        assert record["best_value"] == 0.625
        assert record["stochastic_value"] == 0.625
        # best/worst are same, should shortcut trials
        assert record["num_sort_trials"] == 0
        assert record["standard_deviation"] == 0

コード例 #10

0

ファイルを表示

def test_evaluating_early_warning(db_engine_with_results_schema):
    num_entities = 10
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

    # Set up testing configuration parameters
    testing_metric_groups = [
        {
            "metrics": [
                "precision@",
                "recall@",
                "true positives@",
                "true negatives@",
                "false positives@",
                "false negatives@",
            ],
            "thresholds": {"percentiles": [5.0, 10.0], "top_n": [5, 10]},
        },
        {
            "metrics": [
                "f1",
                "mediocre",
                "accuracy",
                "roc_auc",
                "average precision score",
            ]
        },
        {"metrics": ["fbeta@"], "parameters": [{"beta": 0.75}, {"beta": 1.25}]},
    ]

    training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}]

    custom_metrics = {"mediocre": always_half}

    # Acquire fake data and objects to be used in the tests
    model_evaluator = ModelEvaluator(
        testing_metric_groups,
        training_metric_groups,
        db_engine_with_results_schema,
        custom_metrics=custom_metrics,
    )

    fake_test_matrix_store = MockMatrixStore(
        matrix_type="test",
        matrix_uuid="efgh",
        label_count=num_entities,
        db_engine=db_engine_with_results_schema,
        init_labels=pd.DataFrame(
            {
                "label_value": labels,
                "entity_id": list(range(num_entities)),
                "as_of_date": [TRAIN_END_TIME] * num_entities,
            }
        )
        .set_index(["entity_id", "as_of_date"])
        .label_value,
        init_as_of_dates=[TRAIN_END_TIME],
    )
    fake_train_matrix_store = MockMatrixStore(
        matrix_type="train",
        matrix_uuid="1234",
        label_count=num_entities,
        db_engine=db_engine_with_results_schema,
        init_labels=pd.DataFrame(
            {
                "label_value": labels,
                "entity_id": list(range(num_entities)),
                "as_of_date": [TRAIN_END_TIME] * num_entities,
            }
        )
        .set_index(["entity_id", "as_of_date"])
        .label_value,
        init_as_of_dates=[TRAIN_END_TIME],
    )

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    # ensure that the matrix uuid is present
    matrix_uuids = [
        row[0]
        for row in db_engine_with_results_schema.execute(
            "select matrix_uuid from test_results.evaluations"
        )
    ]
    assert all(matrix_uuid == "efgh" for matrix_uuid in matrix_uuids)

    # Evaluate the training metrics and test
    model_evaluator.evaluate(
        trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id
    )
    records = [
        row[0]
        for row in db_engine_with_results_schema.execute(
            """select distinct(metric || parameter)
            from train_results.evaluations
            where model_id = %s and
            evaluation_start_time = %s
            order by 1""",
            (model_id, fake_train_matrix_store.as_of_dates[0]),
        )
    ]
    assert records == ["accuracy", "roc_auc"]

    # Run tests for overall and subset evaluations
    for subset in SUBSETS:
        if subset is None:
            where_hash = ""
        else:
            populate_subset_data(
                db_engine_with_results_schema, subset, list(range(num_entities))
            )
            SubsetFactory(subset_hash=filename_friendly_hash(subset))
            session.commit()
            where_hash = f"and subset_hash = '{filename_friendly_hash(subset)}'"
        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_test_matrix_store,
            model_id,
            subset=subset,
        )

        records = [
            row[0]
            for row in db_engine_with_results_schema.execute(
                f"""\
                select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                {where_hash}
                order by 1
                """,
                (model_id, fake_test_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == [
            "accuracy",
            "average precision score",
            "f1",
            "false [email protected]_pct",
            "false negatives@10_abs",
            "false [email protected]_pct",
            "false negatives@5_abs",
            "false [email protected]_pct",
            "false positives@10_abs",
            "false [email protected]_pct",
            "false positives@5_abs",
            "[email protected]_beta",
            "[email protected]_beta",
            "mediocre",
            "[email protected]_pct",
            "precision@10_abs",
            "[email protected]_pct",
            "precision@5_abs",
            "[email protected]_pct",
            "recall@10_abs",
            "[email protected]_pct",
            "recall@5_abs",
            "roc_auc",
            "true [email protected]_pct",
            "true negatives@10_abs",
            "true [email protected]_pct",
            "true negatives@5_abs",
            "true [email protected]_pct",
            "true positives@10_abs",
            "true [email protected]_pct",
            "true positives@5_abs",
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_train_matrix_store,
            model_id,
            subset=subset,
        )

        records = [
            row[0]
            for row in db_engine_with_results_schema.execute(
                f"""select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                {where_hash}
                order by 1""",
                (model_id, fake_train_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == ["accuracy", "roc_auc"]

    # ensure that the matrix uuid is present
    matrix_uuids = [
        row[0]
        for row in db_engine_with_results_schema.execute(
            "select matrix_uuid from train_results.evaluations"
        )
    ]
    assert all(matrix_uuid == "1234" for matrix_uuid in matrix_uuids)

コード例 #11

0

ファイルを表示

ファイル: test_integration.py プロジェクト: snowdj/triage

def test_integration():
    with rig_engines() as (db_engine, project_storage):
        train_store = get_matrix_store(
            project_storage, matrix_creator(),
            matrix_metadata_creator(matrix_type='train'))
        as_of_dates = [datetime.date(2016, 12, 21), datetime.date(2017, 1, 21)]

        test_stores = []
        for as_of_date in as_of_dates:
            matrix_store = get_matrix_store(
                project_storage,
                pandas.DataFrame.from_dict({
                    'entity_id': [3],
                    'feature_one': [8],
                    'feature_two': [5],
                    'label': [0]
                }).set_index('entity_id'),
                matrix_metadata_creator(end_time=as_of_date,
                                        indices=['entity_id']))
            test_stores.append(matrix_store)

        model_storage_engine = ModelStorageEngine(project_storage)

        experiment_hash = save_experiment_and_get_hash({}, db_engine)
        # instantiate pipeline objects
        trainer = ModelTrainer(
            experiment_hash=experiment_hash,
            model_storage_engine=model_storage_engine,
            db_engine=db_engine,
        )
        predictor = Predictor(model_storage_engine, db_engine)
        model_evaluator = ModelEvaluator([{
            'metrics': ['precision@'],
            'thresholds': {
                'top_n': [5]
            }
        }], [{}], db_engine)

        # run the pipeline
        grid_config = {
            'sklearn.linear_model.LogisticRegression': {
                'C': [0.00001, 0.0001],
                'penalty': ['l1', 'l2'],
                'random_state': [2193]
            }
        }
        model_ids = trainer.train_models(grid_config=grid_config,
                                         misc_db_parameters=dict(),
                                         matrix_store=train_store)

        for model_id in model_ids:
            for as_of_date, test_store in zip(as_of_dates, test_stores):
                predictions_proba = predictor.predict(
                    model_id,
                    test_store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=['feature_one', 'feature_two'])

                model_evaluator.evaluate(
                    predictions_proba,
                    test_store,
                    model_id,
                )

        # assert
        # 1. that the predictions table entries are present and
        # can be linked to the original models
        records = [
            row for row in db_engine.execute(
                '''select entity_id, model_id, as_of_date
            from test_results.predictions
            join model_metadata.models using (model_id)
            order by 3, 2''')
        ]
        assert records == [
            (3, 1, datetime.datetime(2016, 12, 21)),
            (3, 2, datetime.datetime(2016, 12, 21)),
            (3, 3, datetime.datetime(2016, 12, 21)),
            (3, 4, datetime.datetime(2016, 12, 21)),
            (3, 1, datetime.datetime(2017, 1, 21)),
            (3, 2, datetime.datetime(2017, 1, 21)),
            (3, 3, datetime.datetime(2017, 1, 21)),
            (3, 4, datetime.datetime(2017, 1, 21)),
        ]

        # that evaluations are there
        records = [
            row for row in db_engine.execute('''
                select model_id, evaluation_start_time, metric, parameter
                from test_results.evaluations order by 2, 1''')
        ]
        assert records == [
            (1, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
            (2, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
            (3, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
            (4, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
            (1, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
            (2, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
            (3, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
            (4, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
        ]

コード例 #12

0

ファイルを表示

ファイル: test_integration.py プロジェクト: afcarl/triage

def test_integration():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            project_path = 'econ-dev/inspections'

            # create train and test matrices
            train_matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            train_metadata = {
                'feature_start_time': datetime.date(2012, 12, 20),
                'end_time': datetime.date(2016, 12, 20),
                'label_name': 'label',
                'label_timespan': '1y',
                'feature_names': ['ft1', 'ft2'],
                'metta-uuid': '1234',
                'indices': ['entity_id'],
                'matrix_type': 'train'
            }
            # Creates a matrix entry in the matrices table with uuid from train_metadata
            MatrixFactory(matrix_uuid="1234")
            session.commit()

            train_store = InMemoryMatrixStore(train_matrix, sample_metadata())

            as_of_dates = [
                datetime.date(2016, 12, 21),
                datetime.date(2017, 1, 21)
            ]

            test_stores = [
                InMemoryMatrixStore(
                    pandas.DataFrame.from_dict({
                        'entity_id': [3],
                        'feature_one': [8],
                        'feature_two': [5],
                        'label': [5]
                    }), {
                        'label_name': 'label',
                        'label_timespan': '1y',
                        'end_time': as_of_date,
                        'metta-uuid': '1234',
                        'indices': ['entity_id'],
                        'matrix_type': 'test',
                        'as_of_date_frequency': '1month'
                    }) for as_of_date in as_of_dates
            ]

            model_storage_engine = S3ModelStorageEngine(project_path)

            experiment_hash = save_experiment_and_get_hash({}, db_engine)
            # instantiate pipeline objects
            trainer = ModelTrainer(
                project_path=project_path,
                experiment_hash=experiment_hash,
                model_storage_engine=model_storage_engine,
                db_engine=db_engine,
            )
            predictor = Predictor(project_path, model_storage_engine,
                                  db_engine)
            model_evaluator = ModelEvaluator([{
                'metrics': ['precision@'],
                'thresholds': {
                    'top_n': [5]
                }
            }], [{}], db_engine)

            # run the pipeline
            grid_config = {
                'sklearn.linear_model.LogisticRegression': {
                    'C': [0.00001, 0.0001],
                    'penalty': ['l1', 'l2'],
                    'random_state': [2193]
                }
            }
            model_ids = trainer.train_models(grid_config=grid_config,
                                             misc_db_parameters=dict(),
                                             matrix_store=train_store)

            for model_id in model_ids:
                for as_of_date, test_store in zip(as_of_dates, test_stores):
                    predictions_proba = predictor.predict(
                        model_id,
                        test_store,
                        misc_db_parameters=dict(),
                        train_matrix_columns=['feature_one', 'feature_two'])

                    model_evaluator.evaluate(
                        predictions_proba,
                        test_store,
                        model_id,
                    )

            # assert
            # 1. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row for row in db_engine.execute(
                    '''select entity_id, model_id, as_of_date
                from test_results.test_predictions
                join model_metadata.models using (model_id)
                order by 3, 2''')
            ]
            assert records == [
                (3, 1, datetime.datetime(2016, 12, 21)),
                (3, 2, datetime.datetime(2016, 12, 21)),
                (3, 3, datetime.datetime(2016, 12, 21)),
                (3, 4, datetime.datetime(2016, 12, 21)),
                (3, 1, datetime.datetime(2017, 1, 21)),
                (3, 2, datetime.datetime(2017, 1, 21)),
                (3, 3, datetime.datetime(2017, 1, 21)),
                (3, 4, datetime.datetime(2017, 1, 21)),
            ]

            # that evaluations are there
            records = [
                row for row in db_engine.execute('''
                    select model_id, evaluation_start_time, metric, parameter
                    from test_results.test_evaluations order by 2, 1''')
            ]
            assert records == [
                (1, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (2, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (3, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (4, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (1, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (2, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (3, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (4, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
            ]

コード例 #13

0

ファイルを表示

ファイル: test_evaluation.py プロジェクト: washingtonm/triage

def test_evaluating_early_warning():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [{
            'metrics': ['precision@',
                        'recall@',
                        'true positives@',
                        'true negatives@',
                        'false positives@',
                        'false negatives@'],
            'thresholds': {
                'percentiles': [5.0, 10.0],
                'top_n': [5, 10]
            }
        }, {
            'metrics': ['f1',
                        'mediocre',
                        'accuracy',
                        'roc_auc',
                        'average precision score'],
        }, {
            'metrics': ['fbeta@'],
            'parameters': [{'beta': 0.75}, {'beta': 1.25}]
        }]

        training_metric_groups = [{'metrics': ['accuracy', 'roc_auc']}]

        custom_metrics = {'mediocre': always_half}

        model_evaluator = ModelEvaluator(testing_metric_groups, training_metric_groups, db_engine,
            custom_metrics=custom_metrics
        )

        labels = fake_labels(5)
        fake_train_matrix_store = MockMatrixStore('train', 'efgh', 5, db_engine, labels)
        fake_test_matrix_store = MockMatrixStore('test', '1234', 5, db_engine, labels)

        trained_model, model_id = fake_trained_model(
            'myproject',
            InMemoryModelStorageEngine('myproject'),
            db_engine
        )

        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_test_matrix_store,
            model_id,
        )
        records = [
            row[0] for row in
            db_engine.execute(
                '''select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1''',
                (model_id, fake_test_matrix_store.as_of_dates[0])
            )
        ]
        assert records == [
            'accuracy',
            'average precision score',
            'f1',
            'false [email protected]_pct',
            'false negatives@10_abs',
            'false [email protected]_pct',
            'false negatives@5_abs',
            'false [email protected]_pct',
            'false positives@10_abs',
            'false [email protected]_pct',
            'false positives@5_abs',
            '[email protected]_beta',
            '[email protected]_beta',
            'mediocre',
            '[email protected]_pct',
            'precision@10_abs',
            '[email protected]_pct',
            'precision@5_abs',
            '[email protected]_pct',
            'recall@10_abs',
            '[email protected]_pct',
            'recall@5_abs',
            'roc_auc',
            'true [email protected]_pct',
            'true negatives@10_abs',
            'true [email protected]_pct',
            'true negatives@5_abs',
            'true [email protected]_pct',
            'true positives@10_abs',
            'true [email protected]_pct',
            'true positives@5_abs'
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_train_matrix_store,
            model_id,
        )
        records = [
            row[0] for row in
            db_engine.execute(
                '''select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1''',
                (model_id, fake_train_matrix_store.as_of_dates[0])
            )
        ]
        assert records == ['accuracy', 'roc_auc']

コード例 #14

0

ファイルを表示

ファイル: test_evaluation.py プロジェクト: washingtonm/triage

def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [{
            'metrics': ['precision@', 'recall@', 'fpr@'],
            'thresholds': {'percentiles': [50.0], 'top_n': [3]}
        }, {
            # ensure we test a non-thresholded metric as well
            'metrics': ['accuracy'],
        }]
        training_metric_groups = [{'metrics': ['accuracy'], 'thresholds': {'percentiles': [50.0]}}]

        model_evaluator = ModelEvaluator(testing_metric_groups, training_metric_groups, db_engine)

        testing_labels = numpy.array([True, False, numpy.nan, True, False])
        testing_prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3])

        training_labels = numpy.array([False, False, True, True, True, False, True, True])
        training_prediction_probas = numpy.array([0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6])

        fake_train_matrix_store = MockMatrixStore('train', 'efgh', 5, db_engine, training_labels)
        fake_test_matrix_store = MockMatrixStore('test', '1234', 5, db_engine, testing_labels)

        trained_model, model_id = fake_trained_model(
            'myproject',
            InMemoryModelStorageEngine('myproject'),
            db_engine
        )

        # Evaluate testing matrix and test the results
        model_evaluator.evaluate(
            testing_prediction_probas,
            fake_test_matrix_store,
            model_id,
        )
        for record in db_engine.execute(
            '''select * from test_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1''',
            (model_id, fake_test_matrix_store.as_of_dates[0])
        ):
            assert record['num_labeled_examples'] == 4
            assert record['num_positive_labels'] == 2
            if record['parameter'] == '':
                assert record['num_labeled_above_threshold'] == 4
            elif 'pct' in record['parameter']:
                assert record['num_labeled_above_threshold'] == 1
            else:
                assert record['num_labeled_above_threshold'] == 2

        # Evaluate the training matrix and test the results
        model_evaluator.evaluate(
                    training_prediction_probas,
                    fake_train_matrix_store,
                    model_id,
        )
        for record in db_engine.execute(
            '''select * from train_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1''',
            (model_id, fake_train_matrix_store.as_of_dates[0])
        ):
            assert record['num_labeled_examples'] == 8
            assert record['num_positive_labels'] == 5
            assert record['value'] == 0.625

コード例 #15

0

ファイルを表示

class ModelTester(object):
    def __init__(self, db_engine, project_path, model_storage_engine, replace,
                 evaluator_config, individual_importance_config):
        self.predictor = Predictor(db_engine=db_engine,
                                   model_storage_engine=model_storage_engine,
                                   project_path=project_path,
                                   replace=replace)

        self.individual_importance_calculator = IndividualImportanceCalculator(
            db_engine=db_engine,
            n_ranks=individual_importance_config.get('n_ranks', 5),
            methods=individual_importance_config.get('methods', ['uniform']),
            replace=replace)

        self.evaluator = ModelEvaluator(
            db_engine=db_engine,
            sort_seed=evaluator_config.get('sort_seed', None),
            metric_groups=evaluator_config['metric_groups'],
            training_metric_groups=evaluator_config['training_metric_groups'])

    def generate_model_test_tasks(self, split, train_store, model_ids,
                                  matrix_store_creator):
        test_tasks = []
        for test_matrix_def, test_uuid in zip(split['test_matrices'],
                                              split['test_uuids']):
            test_store = matrix_store_creator(test_uuid)

            if test_store.empty:
                logging.warning(
                    '''Test matrix for uuid %s
                was empty, no point in generating predictions. Not creating test task.
                ''', test_uuid)
                continue
            test_tasks.append({
                'test_store':
                test_store,
                'train_store':
                train_store,
                'model_ids': [model_id for model_id in model_ids if model_id]
            })
        return test_tasks

    def process_model_test_task(self, test_store, train_store, model_ids):
        as_of_times = test_store.metadata['as_of_times']
        logging.info(
            'Testing and scoring all model ids with test matrix %s. as_of_times min: %s max: %s num: %s',
            test_store.uuid, min(as_of_times), max(as_of_times),
            len(as_of_times))
        for model_id in model_ids:
            logging.info('Testing model id %s', model_id)

            self.individual_importance_calculator\
                .calculate_and_save_all_methods_and_dates(
                    model_id,
                    test_store
                )

            # Generate predictions for the testing data then training data
            for store in (test_store, train_store):
                predictions_proba = self.predictor.predict(
                    model_id,
                    store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=train_store.columns())

                self.evaluator.evaluate(
                    predictions_proba=predictions_proba,
                    matrix_store=store,
                    model_id=model_id,
                )

コード例 #16

0

ファイルを表示

def test_integration():
    with rig_engines() as (db_engine, project_storage):
        train_store = get_matrix_store(
            project_storage,
            matrix_creator(),
            matrix_metadata_creator(matrix_type="train"),
        )
        as_of_dates = [datetime.date(2016, 12, 21), datetime.date(2017, 1, 21)]

        test_stores = []
        for as_of_date in as_of_dates:
            matrix_store = get_matrix_store(
                project_storage,
                pandas.DataFrame.from_dict({
                    "entity_id": [3],
                    "feature_one": [8],
                    "feature_two": [5],
                    "label": [0],
                }).set_index("entity_id"),
                matrix_metadata_creator(end_time=as_of_date,
                                        indices=["entity_id"]),
            )
            test_stores.append(matrix_store)

        model_storage_engine = ModelStorageEngine(project_storage)

        experiment_hash = save_experiment_and_get_hash({}, db_engine)
        # instantiate pipeline objects
        trainer = ModelTrainer(
            experiment_hash=experiment_hash,
            model_storage_engine=model_storage_engine,
            db_engine=db_engine,
        )
        predictor = Predictor(model_storage_engine, db_engine)
        model_evaluator = ModelEvaluator([{
            "metrics": ["precision@"],
            "thresholds": {
                "top_n": [5]
            }
        }], [{}], db_engine)

        # run the pipeline
        grid_config = {
            "sklearn.linear_model.LogisticRegression": {
                "C": [0.00001, 0.0001],
                "penalty": ["l1", "l2"],
                "random_state": [2193],
            }
        }
        model_ids = trainer.train_models(grid_config=grid_config,
                                         misc_db_parameters=dict(),
                                         matrix_store=train_store)

        for model_id in model_ids:
            for as_of_date, test_store in zip(as_of_dates, test_stores):
                predictions_proba = predictor.predict(
                    model_id,
                    test_store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=["feature_one", "feature_two"],
                )

                model_evaluator.evaluate(predictions_proba, test_store,
                                         model_id)

        # assert
        # 1. that the predictions table entries are present and
        # can be linked to the original models
        records = [
            row for row in db_engine.execute(
                """select entity_id, model_id, as_of_date
            from test_results.predictions
            join model_metadata.models using (model_id)
            order by 3, 2""")
        ]
        assert records == [
            (3, 1, datetime.datetime(2016, 12, 21)),
            (3, 2, datetime.datetime(2016, 12, 21)),
            (3, 3, datetime.datetime(2016, 12, 21)),
            (3, 4, datetime.datetime(2016, 12, 21)),
            (3, 1, datetime.datetime(2017, 1, 21)),
            (3, 2, datetime.datetime(2017, 1, 21)),
            (3, 3, datetime.datetime(2017, 1, 21)),
            (3, 4, datetime.datetime(2017, 1, 21)),
        ]

        # that evaluations are there
        records = [
            row for row in db_engine.execute("""
                select model_id, evaluation_start_time, metric, parameter
                from test_results.evaluations order by 2, 1""")
        ]
        assert records == [
            (1, datetime.datetime(2016, 12, 21), "precision@", "5_abs"),
            (2, datetime.datetime(2016, 12, 21), "precision@", "5_abs"),
            (3, datetime.datetime(2016, 12, 21), "precision@", "5_abs"),
            (4, datetime.datetime(2016, 12, 21), "precision@", "5_abs"),
            (1, datetime.datetime(2017, 1, 21), "precision@", "5_abs"),
            (2, datetime.datetime(2017, 1, 21), "precision@", "5_abs"),
            (3, datetime.datetime(2017, 1, 21), "precision@", "5_abs"),
            (4, datetime.datetime(2017, 1, 21), "precision@", "5_abs"),
        ]