Beispiel #1
0
def test_uniform_distribution_entity_id_index():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        model = ModelFactory()
        feature_importances = [
            FeatureImportanceFactory(model_rel=model,
                                     feature='feature_{}'.format(i))
            for i in range(0, 10)
        ]
        data_dict = {'entity_id': [1, 2]}
        for imp in feature_importances:
            data_dict[imp.feature] = [0.5, 0.5]
        test_store = InMemoryMatrixStore(
            matrix=pandas.DataFrame.from_dict(data_dict),
            metadata=sample_metadata())
        session.commit()
        results = uniform_distribution(db_engine,
                                       model_id=model.model_id,
                                       as_of_date='2016-01-01',
                                       test_matrix_store=test_store,
                                       n_ranks=5)

        assert len(results) == 10  # 5 features x 2 entities
        for result in results:
            assert 'entity_id' in result
            assert 'feature_name' in result
            assert 'score' in result
            assert 'feature_value' in result
            assert result['feature_value'] == 0.5
            assert result['score'] >= 0
            assert result['score'] <= 1
            assert isinstance(result['feature_name'], str)
            assert result['entity_id'] in [1, 2]
def test_uniform_distribution():
    with rig_engines() as (db_engine, project_storage):
        model = ModelFactory()
        feature_importances = [
            FeatureImportanceFactory(model_rel=model, feature="feature_{}".format(i))
            for i in range(0, 10)
        ]
        data_dict = {"entity_id": [1, 1], "as_of_date": ["2016-01-01", "2017-01-01"], "label": [0, 1]}
        for imp in feature_importances:
            data_dict[imp.feature] = [0.5, 0.5]
        metadata = matrix_metadata_creator()
        test_store = get_matrix_store(
            project_storage,
            pandas.DataFrame.from_dict(data_dict),
            metadata,
        )
        results = uniform_distribution(
            db_engine,
            model_id=model.model_id,
            as_of_date=datetime.date(2016, 1, 1),
            test_matrix_store=test_store,
            n_ranks=5,
        )

        assert len(results) == 5  # 5 features x 1 entity for this as_of_date
        for result in results:
            assert "entity_id" in result
            assert "feature_name" in result
            assert "score" in result
            assert "feature_value" in result
            assert result["feature_value"] == 0.5
            assert result["score"] >= 0
            assert result["score"] <= 1
            assert isinstance(result["feature_name"], str)
            assert result["entity_id"] in [1, 2]
def test_uniform_distribution_entity_id_index():
    with rig_engines() as (db_engine, project_storage):
        model = ModelFactory()
        feature_importances = [
            FeatureImportanceFactory(model_rel=model,
                                     feature='feature_{}'.format(i))
            for i in range(0, 10)
        ]
        data_dict = {'entity_id': [1, 2]}
        for imp in feature_importances:
            data_dict[imp.feature] = [0.5, 0.5]
        metadata = matrix_metadata_creator(indices='entity_id')
        test_store = get_matrix_store(
            project_storage,
            pandas.DataFrame.from_dict(data_dict).set_index(
                metadata['indices']), metadata)
        results = uniform_distribution(db_engine,
                                       model_id=model.model_id,
                                       as_of_date='2016-01-01',
                                       test_matrix_store=test_store,
                                       n_ranks=5)

        assert len(results) == 10  # 5 features x 2 entities
        for result in results:
            assert 'entity_id' in result
            assert 'feature_name' in result
            assert 'score' in result
            assert 'feature_value' in result
            assert result['feature_value'] == 0.5
            assert result['score'] >= 0
            assert result['score'] <= 1
            assert isinstance(result['feature_name'], str)
            assert result['entity_id'] in [1, 2]
Beispiel #4
0
def update_ranks_test(predictor,
                      entities_scores_labels,
                      rank_col,
                      expected_result,
                      model_random_seed=12345,
                      need_seed_data=True):
    """Not a test in itself but rather a utility called by many of the ranking tests"""
    ensure_db(predictor.db_engine)
    init_engine(predictor.db_engine)
    model_id = 5
    matrix_uuid = "4567"
    matrix_type = "test"
    as_of_date = datetime.datetime(2012, 1, 1)
    if need_seed_data:
        matrix = MatrixFactory(matrix_uuid=matrix_uuid)
        model = ModelFactory(model_id=model_id, random_seed=model_random_seed)
        for entity_id, score, label in entities_scores_labels:
            PredictionFactory(model_rel=model,
                              matrix_rel=matrix,
                              as_of_date=as_of_date,
                              entity_id=entity_id,
                              score=score,
                              label_value=int(label))
        factory_session.commit()
    predictor.update_db_with_ranks(
        model_id=model_id,
        matrix_uuid=matrix_uuid,
        matrix_type=TestMatrixType,
    )
    ranks = tuple(row for row in predictor.db_engine.execute(
        f'''
select entity_id, {rank_col}::float
from {matrix_type}_results.predictions
where as_of_date = %s and model_id = %s and matrix_uuid = %s order by {rank_col} asc''',
        (as_of_date, model_id, matrix_uuid)))
    assert ranks == expected_result

    # Test that the predictions metadata table is populated
    metadata_records = [
        row for row in predictor.db_engine.execute(
            f"""select tiebreaker_ordering, prediction_metadata.random_seed, models.random_seed
        from {matrix_type}_results.prediction_metadata
        join triage_metadata.models using (model_id)
        join triage_metadata.matrices using (matrix_uuid)
        """)
    ]
    assert len(metadata_records) == 1
    tiebreaker_ordering, random_seed, received_model_random_seed = metadata_records[
        0]
    if tiebreaker_ordering == 'random':
        assert random_seed is model_random_seed
    else:
        assert not random_seed
    assert tiebreaker_ordering == predictor.rank_order
    assert received_model_random_seed == model_random_seed
Beispiel #5
0
def test_Audition():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        num_model_groups = 10
        model_types = [
            "classifier type {}".format(i) for i in range(0, num_model_groups)
        ]
        model_groups = [
            ModelGroupFactory(model_type=model_type)
            for model_type in model_types
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2014, 1, 1),
            datetime(2015, 1, 1),
            datetime(2016, 1, 1),
        ]

        models = [
            ModelFactory(model_group_rel=model_group,
                         train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ("precision@", "100_abs"),
            ("recall@", "100_abs"),
            ("precision@", "50_abs"),
            ("recall@", "50_abs"),
            ("fpr@", "10_pct"),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(model_rel=model,
                                     metric=metric,
                                     parameter=parameter)

        session.commit()

        with tempfile.TemporaryDirectory() as td:
            with mock.patch('os.getcwd') as mock_getcwd:
                mock_getcwd.return_value = td
                AuditionRunner(config_dict=config,
                               db_engine=db_engine,
                               directory=td).run()
                assert len(os.listdir(os.getcwd())) == 6
Beispiel #6
0
def test_ModelEvaluator_needs_evaluation_with_bias_audit(
        db_engine_with_results_schema):
    # test that if a bias audit config is passed, and there are no matching bias audits
    # in the database, needs_evaluation returns true
    # this all assumes that evaluations are populated. those tests are in the 'no_bias_audit' test
    model_evaluator = ModelEvaluator(
        testing_metric_groups=[
            {
                "metrics": ["precision@"],
                "thresholds": {
                    "top_n": [3]
                },
            },
        ],
        training_metric_groups=[],
        bias_config={'thresholds': {
            'top_n': [2]
        }},
        db_engine=db_engine_with_results_schema,
    )
    model_with_evaluations = ModelFactory()

    eval_time = datetime.datetime(2016, 1, 1)
    as_of_date_frequency = "3d"
    for subset_hash in [""]:
        EvaluationFactory(
            model_rel=model_with_evaluations,
            evaluation_start_time=eval_time,
            evaluation_end_time=eval_time,
            as_of_date_frequency=as_of_date_frequency,
            metric="precision@",
            parameter="3_abs",
            subset_hash=subset_hash,
        )
    session.commit()

    # make a test matrix to pass in
    metadata_overrides = {
        'as_of_date_frequency': as_of_date_frequency,
        'as_of_times': [eval_time],
    }
    test_matrix_store = MockMatrixStore("test",
                                        "1234",
                                        5,
                                        db_engine_with_results_schema,
                                        metadata_overrides=metadata_overrides)
    assert model_evaluator.needs_evaluations(
        matrix_store=test_matrix_store,
        model_id=model_with_evaluations.model_id,
        subset_hash="",
    )
Beispiel #7
0
def test_prediction_ranks_multiple_dates(project_storage, db_engine):
    """make sure that multiple as-of-dates in a single matrix are handled correctly.
    keep the other variables simple by making no within-date ties that would end up
    testing the tiebreaker logic, just data for two dates with data that could theoretically
    confound a bad ranking method:
    - a different order for entities in both dates
    - each date has some not in the other
    """
    ensure_db(db_engine)
    init_engine(db_engine)
    predictor = Predictor(project_storage.model_storage_engine(), db_engine,
                          'worst')
    model_id = 5
    matrix_uuid = "4567"
    matrix_type = "test"
    entities_dates_and_scores = (
        (23, datetime.datetime(2012, 1, 1), 0.95),
        (34, datetime.datetime(2012, 1, 1), 0.94),
        (45, datetime.datetime(2013, 1, 1), 0.92),
        (23, datetime.datetime(2013, 1, 1), 0.45),
    )
    expected_result = (
        (23, datetime.datetime(2012, 1, 1), 1),
        (34, datetime.datetime(2012, 1, 1), 2),
        (45, datetime.datetime(2013, 1, 1), 3),
        (23, datetime.datetime(2013, 1, 1), 4),
    )
    matrix = MatrixFactory(matrix_uuid=matrix_uuid)
    model = ModelFactory(model_id=model_id)
    for entity_id, as_of_date, score in entities_dates_and_scores:
        PredictionFactory(model_rel=model,
                          matrix_rel=matrix,
                          as_of_date=as_of_date,
                          entity_id=entity_id,
                          score=score)
    factory_session.commit()
    predictor.update_db_with_ranks(
        model_id=model_id,
        matrix_uuid=matrix_uuid,
        matrix_type=TestMatrixType,
    )
    ranks = tuple(row for row in predictor.db_engine.execute(
        f'''
select entity_id, as_of_date, rank_abs_no_ties
from {matrix_type}_results.predictions
where model_id = %s and matrix_uuid = %s order by rank_abs_no_ties''', (
            model_id, matrix_uuid)))
    assert ranks == expected_result
def test_uniform_distribution_entity_id_index():
    with rig_engines() as (db_engine, project_storage):
        model = ModelFactory()
        feature_importances = [
            FeatureImportanceFactory(model_rel=model,
                                     feature="feature_{}".format(i))
            for i in range(0, 10)
        ]
        data_dict = {"entity_id": [1, 2]}
        for imp in feature_importances:
            data_dict[imp.feature] = [0.5, 0.5]
        metadata = matrix_metadata_creator(indices="entity_id")
        test_store = get_matrix_store(
            project_storage,
            pandas.DataFrame.from_dict(data_dict).set_index(
                metadata["indices"]),
            metadata,
        )
        results = uniform_distribution(
            db_engine,
            model_id=model.model_id,
            as_of_date="2016-01-01",
            test_matrix_store=test_store,
            n_ranks=5,
        )

        assert len(results) == 10  # 5 features x 2 entities
        for result in results:
            assert "entity_id" in result
            assert "feature_name" in result
            assert "score" in result
            assert "feature_value" in result
            assert result["feature_value"] == 0.5
            assert result["score"] >= 0
            assert result["score"] <= 1
            assert isinstance(result["feature_name"], str)
            assert result["entity_id"] in [1, 2]
Beispiel #9
0
def test_ModelEvaluator_needs_evaluation_no_bias_audit(db_engine_with_results_schema):
    # TEST SETUP:

    # create two models: one that has zero evaluations,
    # one that has an evaluation for precision@100_abs
    # both overall and for each subset
    model_with_evaluations = ModelFactory()
    model_without_evaluations = ModelFactory()

    eval_time = datetime.datetime(2016, 1, 1)
    as_of_date_frequency = "3d"
    for subset_hash in [""] + [filename_friendly_hash(subset) for subset in SUBSETS]:
        EvaluationFactory(
            model_rel=model_with_evaluations,
            evaluation_start_time=eval_time,
            evaluation_end_time=eval_time,
            as_of_date_frequency=as_of_date_frequency,
            metric="precision@",
            parameter="100_abs",
            subset_hash=subset_hash,
        )
    session.commit()

    # make a test matrix to pass in
    metadata_overrides = {
        "as_of_date_frequency": as_of_date_frequency,
        "as_of_times": [eval_time],
    }
    test_matrix_store = MockMatrixStore(
        "test",
        "1234",
        5,
        db_engine_with_results_schema,
        metadata_overrides=metadata_overrides,
    )
    train_matrix_store = MockMatrixStore(
        "train",
        "2345",
        5,
        db_engine_with_results_schema,
        metadata_overrides=metadata_overrides,
    )

    # the evaluated model has test evaluations for precision, but not recall,
    # so this needs evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@", "recall@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=test_matrix_store,
            model_id=model_with_evaluations.model_id,
            subset_hash=subset_hash,
        )

    # the evaluated model has test evaluations for precision,
    # so this should not need evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert not ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=test_matrix_store,
            model_id=model_with_evaluations.model_id,
            subset_hash=subset_hash,
        )

    # the non-evaluated model has no evaluations,
    # so this should need evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=test_matrix_store,
            model_id=model_without_evaluations.model_id,
            subset_hash=subset_hash,
        )

    # the evaluated model has no *train* evaluations,
    # so the train matrix should need evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=train_matrix_store,
            model_id=model_with_evaluations.model_id,
            subset_hash=subset_hash,
        )
    session.close()
    session.remove()
Beispiel #10
0
def test_Auditioner():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        # set up data, randomly generated by the factories but conforming
        # generally to what we expect triage_metadata schema data to look like

        num_model_groups = 10
        model_types = [
            "classifier type {}".format(i) for i in range(0, num_model_groups)
        ]
        model_groups = [
            ModelGroupFactory(model_type=model_type)
            for model_type in model_types
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2014, 1, 1),
            datetime(2015, 1, 1),
            datetime(2016, 1, 1),
        ]

        models = [
            ModelFactory(model_group_rel=model_group,
                         train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ("precision@", "100_abs"),
            ("recall@", "100_abs"),
            ("precision@", "50_abs"),
            ("recall@", "50_abs"),
            ("fpr@", "10_pct"),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(model_rel=model,
                                     metric=metric,
                                     parameter=parameter)

        session.commit()

        # define a very loose filtering that should admit all model groups
        no_filtering = [
            {
                "metric": "precision@",
                "parameter": "100_abs",
                "max_from_best": 1.0,
                "threshold_value": 0.0,
            },
            {
                "metric": "recall@",
                "parameter": "100_abs",
                "max_from_best": 1.0,
                "threshold_value": 0.0,
            },
        ]
        model_group_ids = [mg.model_group_id for mg in model_groups]
        auditioner = Auditioner(db_engine, model_group_ids, train_end_times,
                                no_filtering)
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups
        auditioner.plot_model_groups()

        # here, we pick thresholding rules that should definitely remove
        # all model groups from contention because they are too strict.
        remove_all = [
            {
                "metric": "precision@",
                "parameter": "100_abs",
                "max_from_best": 0.0,
                "threshold_value": 1.1,
            },
            {
                "metric": "recall@",
                "parameter": "100_abs",
                "max_from_best": 0.0,
                "threshold_value": 1.1,
            },
        ]

        auditioner.update_metric_filters(new_filters=remove_all)
        assert len(auditioner.thresholded_model_group_ids) == 0

        # pass the argument instead and remove all model groups
        auditioner.set_one_metric_filter(
            metric="precision@",
            parameter="100_abs",
            max_from_best=0.0,
            threshold_value=1.1,
        )
        assert len(auditioner.thresholded_model_group_ids) == 0

        # one potential place for bugs would be when we pull back the rules
        # for being too restrictive. we want to make sure that the original list is
        # always used for thresholding, or else such a move would be impossible
        auditioner.update_metric_filters(new_filters=no_filtering)
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups

        # pass the argument instead and let all model groups pass
        auditioner.set_one_metric_filter(
            metric="precision@",
            parameter="100_abs",
            max_from_best=1.0,
            threshold_value=0.0,
        )
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups

        # now, we want to take this partially thresholded list and run it through
        # a grid of selection rules, meant to pick winners by a variety of user-defined
        # criteria
        rule_grid = [
            {
                "shared_parameters": [
                    {
                        "metric": "precision@",
                        "parameter": "100_abs"
                    },
                    {
                        "metric": "recall@",
                        "parameter": "100_abs"
                    },
                ],
                "selection_rules": [
                    {
                        "name": "most_frequent_best_dist",
                        "dist_from_best_case": [0.1, 0.2, 0.3],
                        "n": 1,
                    },
                    {
                        "name": "best_current_value",
                        "n": 1
                    },
                ],
            },
            {
                "shared_parameters": [{
                    "metric1": "precision@",
                    "parameter1": "100_abs"
                }],
                "selection_rules": [{
                    "name": "best_average_two_metrics",
                    "metric2": ["recall@"],
                    "parameter2": ["100_abs"],
                    "metric1_weight": [0.4, 0.5, 0.6],
                    "n": 1,
                }],
            },
        ]
        auditioner.register_selection_rule_grid(rule_grid, plot=False)
        final_model_group_ids = auditioner.selection_rule_model_group_ids

        # we expect the result to be a mapping of selection rule name to model group id
        assert isinstance(final_model_group_ids, dict)

        # we expect that there is one winner for each selection rule
        assert sorted(final_model_group_ids.keys()) == sorted(
            [rule.descriptive_name for rule in auditioner.selection_rules])
Beispiel #11
0
def test_PreAudition():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        # set up data, randomly generated by the factories but conforming
        # generally to what we expect triage_metadata schema data to look like
        num_model_groups = 10
        model_types = [
            "classifier type {}".format(i) for i in range(0, num_model_groups)
        ]
        model_configs = [
            {"label_definition": "label_1"}
            if i % 2 == 0
            else {"label_definition": "label_2"}
            for i in range(0, num_model_groups)
        ]
        model_groups = [
            ModelGroupFactory(model_type=model_type, model_config=model_config)
            for model_type, model_config in zip(model_types, model_configs)
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2013, 7, 1),
            datetime(2014, 1, 1),
            datetime(2014, 7, 1),
            datetime(2015, 1, 1),
            datetime(2015, 7, 1),
            datetime(2016, 7, 1),
            datetime(2016, 1, 1),
        ]

        models = [
            ModelFactory(model_group_rel=model_group, train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ("precision@", "100_abs"),
            ("recall@", "100_abs"),
            ("precision@", "50_abs"),
            ("recall@", "50_abs"),
            ("fpr@", "10_pct"),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time
            )

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(
                    model_rel=model, metric=metric, parameter=parameter
                )

        session.commit()

        pre_aud = PreAudition(db_engine)

        # Expect the number of model groups with label_1
        assert len(pre_aud.get_model_groups_from_label("label_1")['model_groups']) == sum(
            [x["label_definition"] == "label_1" for x in model_configs]
        )

        # Expect no baseline model groups
        assert len(pre_aud.get_model_groups_from_label("label_1")['baseline_model_groups']) == 0

        # Expect the number of model groups with certain experiment_hash
        experiment_hash = list(
            pd.read_sql(
                """SELECT experiment_hash
                FROM triage_metadata.models
                JOIN triage_metadata.experiment_models using (model_hash)
                limit 1""",
                con=db_engine,
            )["experiment_hash"]
        )[0]
        assert len(pre_aud.get_model_groups_from_experiment(experiment_hash)['model_groups']) == 1

        # Expect the number of model groups for customs SQL
        query = """
            SELECT DISTINCT(model_group_id)
            FROM triage_metadata.models
            JOIN triage_metadata.experiment_models using (model_hash)
            WHERE train_end_time >= '2013-01-01'
            AND experiment_hash = '{}'
        """.format(
            experiment_hash
        )
        assert len(pre_aud.get_model_groups(query)) == 1
        # Expect the number of train_end_times after 2014-01-01
        assert len(pre_aud.get_train_end_times(after="2014-01-01")) == 6

        query = """
            SELECT DISTINCT train_end_time
            FROM triage_metadata.models
            WHERE model_group_id IN ({})
                AND train_end_time >= '2014-01-01'
            ORDER BY train_end_time
            """.format(
            ", ".join(map(str, pre_aud.model_groups))
        )

        assert len(pre_aud.get_train_end_times(query=query)) == 6
Beispiel #12
0
def test_PreAudition():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        # set up data, randomly generated by the factories but conforming
        # generally to what we expect results schema data to look like
        num_model_groups = 10
        model_types = [
            'classifier type {}'.format(i) for i in range(0, num_model_groups)
        ]
        model_configs = [{
            'label_definition': 'label_1'
        } if i % 2 == 0 else {
            'label_definition': 'label_2'
        } for i in range(0, num_model_groups)]
        model_groups = [
            ModelGroupFactory(model_type=model_type, model_config=model_config)
            for model_type, model_config in zip(model_types, model_configs)
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2013, 7, 1),
            datetime(2014, 1, 1),
            datetime(2014, 7, 1),
            datetime(2015, 1, 1),
            datetime(2015, 7, 1),
            datetime(2016, 7, 1),
            datetime(2016, 1, 1),
        ]
        models = [
            ModelFactory(model_group_rel=model_group,
                         train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ('precision@', '100_abs'),
            ('recall@', '100_abs'),
            ('precision@', '50_abs'),
            ('recall@', '50_abs'),
            ('fpr@', '10_pct'),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(model_rel=model,
                                     metric=metric,
                                     parameter=parameter)

        session.commit()

        pre_aud = PreAudition(db_engine)

        # Expect the number of model groups with label_1
        assert len(pre_aud.get_model_groups_from_label("label_1")) == \
            sum([x['label_definition']=='label_1' for x in model_configs])

        # Expect the number of model groups with certain experiment_hash
        experiment_hash = list(
            pd.read_sql("SELECT experiment_hash FROM results.models limit 1",
                        con=db_engine)['experiment_hash'])[0]
        assert len(
            pre_aud.get_model_groups_from_experiment(experiment_hash)) == 1

        # Expect the number of model groups for customs SQL
        query = """
            SELECT DISTINCT(model_group_id)
            FROM results.models
            WHERE train_end_time >= '2013-01-01'
            AND experiment_hash = '{}'
        """.format(experiment_hash)
        assert len(pre_aud.get_model_groups(query)) == 1

        # Expect the number of train_end_times after 2014-01-01
        assert len(pre_aud.get_train_end_times(after='2014-01-01')) == 6

        query = """
            SELECT DISTINCT train_end_time
            FROM results.models
            WHERE model_group_id IN ({})
                AND train_end_time >= '2014-01-01'
            ORDER BY train_end_time
            """.format(', '.join(map(str, pre_aud.model_groups)))

        assert len(pre_aud.get_train_end_times(query=query)) == 6
Beispiel #13
0
    def filter_train_end_times(self, engine, train_end_times):
        ensure_db(engine)
        init_engine(engine)
        mg1 = ModelGroupFactory(model_group_id=1, model_type="modelType1")
        mg2 = ModelGroupFactory(model_group_id=2, model_type="modelType2")
        mg3 = ModelGroupFactory(model_group_id=3, model_type="modelType3")
        mg4 = ModelGroupFactory(model_group_id=4, model_type="modelType4")
        mg5 = ModelGroupFactory(model_group_id=5, model_type="modelType5")
        # model group 1
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2017, 1, 1))
        # model group 2 only has one timestamps
        ModelFactory(model_group_rel=mg2, train_end_time=datetime(2014, 1, 1))
        # model group 3
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2017, 1, 1))
        # model group 4 only has two timestamps
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2016, 1, 1))
        # model group 5 only has three timestamps
        ModelFactory(model_group_rel=mg5, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg5, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg5, train_end_time=datetime(2016, 1, 1))

        session.commit()
        model_groups = [1, 2, 3, 4, 5]
        model_group_ids = model_groups_filter(
            train_end_times=train_end_times,
            initial_model_group_ids=model_groups,
            models_table="models",
            db_engine=engine,
        )

        return model_group_ids
Beispiel #14
0
def test_Auditioner():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        # set up data, randomly generated by the factories but conforming
        # generally to what we expect model_metadata schema data to look like

        num_model_groups = 10
        model_types = [
            'classifier type {}'.format(i) for i in range(0, num_model_groups)
        ]
        model_groups = [
            ModelGroupFactory(model_type=model_type)
            for model_type in model_types
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2014, 1, 1),
            datetime(2015, 1, 1),
            datetime(2016, 1, 1),
        ]

        models = [
            ModelFactory(model_group_rel=model_group,
                         train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ('precision@', '100_abs'),
            ('recall@', '100_abs'),
            ('precision@', '50_abs'),
            ('recall@', '50_abs'),
            ('fpr@', '10_pct'),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(model_rel=model,
                                     metric=metric,
                                     parameter=parameter)

        session.commit()

        # define a very loose filtering that should admit all model groups
        no_filtering = [{
            'metric': 'precision@',
            'parameter': '100_abs',
            'max_from_best': 1.0,
            'threshold_value': 0.0
        }, {
            'metric': 'recall@',
            'parameter': '100_abs',
            'max_from_best': 1.0,
            'threshold_value': 0.0
        }]
        model_group_ids = [mg.model_group_id for mg in model_groups]
        auditioner = Auditioner(
            db_engine,
            model_group_ids,
            train_end_times,
            no_filtering,
        )
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups
        auditioner.plot_model_groups()

        # here, we pick thresholding rules that should definitely remove
        # all model groups from contention because they are too strict.
        remove_all = [{
            'metric': 'precision@',
            'parameter': '100_abs',
            'max_from_best': 0.0,
            'threshold_value': 1.1
        }, {
            'metric': 'recall@',
            'parameter': '100_abs',
            'max_from_best': 0.0,
            'threshold_value': 1.1
        }]

        auditioner.update_metric_filters(new_filters=remove_all)
        assert len(auditioner.thresholded_model_group_ids) == 0

        # pass the argument instead and remove all model groups
        auditioner.set_one_metric_filter(metric='precision@',
                                         parameter='100_abs',
                                         max_from_best=0.0,
                                         threshold_value=1.1)
        assert len(auditioner.thresholded_model_group_ids) == 0

        # one potential place for bugs would be when we pull back the rules
        # for being too restrictive. we want to make sure that the original list is
        # always used for thresholding, or else such a move would be impossible
        auditioner.update_metric_filters(new_filters=no_filtering)
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups

        # pass the argument instead and let all model groups pass
        auditioner.set_one_metric_filter(metric='precision@',
                                         parameter='100_abs',
                                         max_from_best=1.0,
                                         threshold_value=0.0)
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups

        # now, we want to take this partially thresholded list and run it through
        # a grid of selection rules, meant to pick winners by a variety of user-defined
        # criteria
        rule_grid = [{
            'shared_parameters': [
                {
                    'metric': 'precision@',
                    'parameter': '100_abs'
                },
                {
                    'metric': 'recall@',
                    'parameter': '100_abs'
                },
            ],
            'selection_rules': [{
                'name': 'most_frequent_best_dist',
                'dist_from_best_case': [0.1, 0.2, 0.3],
                'n': 1
            }, {
                'name': 'best_current_value',
                'n': 1
            }]
        }, {
            'shared_parameters': [
                {
                    'metric1': 'precision@',
                    'parameter1': '100_abs'
                },
            ],
            'selection_rules': [
                {
                    'name': 'best_average_two_metrics',
                    'metric2': ['recall@'],
                    'parameter2': ['100_abs'],
                    'metric1_weight': [0.4, 0.5, 0.6],
                    'n': 1
                },
            ]
        }]
        auditioner.register_selection_rule_grid(rule_grid, plot=False)
        final_model_group_ids = auditioner.selection_rule_model_group_ids

        # we expect the result to be a mapping of selection rule name to model group id
        assert isinstance(final_model_group_ids, dict)

        # we expect that there is one winner for each selection rule
        assert sorted(final_model_group_ids.keys()) == \
            sorted([rule.descriptive_name for rule in auditioner.selection_rules])

        # we expect that the results written to the yaml file are the
        # chosen model groups and their rules
        # however because the source data is randomly generated we could have a
        # different list on consecutive runs
        # and don't want to introduce non-determinism to the test
        with tempfile.NamedTemporaryFile() as tf:
            auditioner.write_tyra_config(tf.name)
            assert sorted(yaml.load(tf)['selection_rule_model_groups'].keys()) == \
                sorted(final_model_group_ids.keys())
Beispiel #15
0
    def filter_same_train_end_times(self, engine):
        ensure_db(engine)
        init_engine(engine)
        mg1 = ModelGroupFactory(model_group_id=1, model_type='modelType1')
        mg2 = ModelGroupFactory(model_group_id=2, model_type='modelType2')
        mg3 = ModelGroupFactory(model_group_id=3, model_type='modelType3')
        mg4 = ModelGroupFactory(model_group_id=4, model_type='modelType4')
        # model group 1
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2017, 1, 1))
        # model group 2 only has three timestamps, should not pass
        ModelFactory(model_group_rel=mg2, train_end_time=datetime(2014, 1, 1))
        # model group 3
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2017, 1, 1))
        # model group 4 only has three timestamps, should not pass
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2016, 1, 1))

        session.commit()
        train_end_times = [
            '2014-01-01', '2015-01-01', '2016-01-01', '2017-01-01'
        ]
        model_groups = [1, 2, 3, 4]
        model_group_ids = model_groups_filter(
            train_end_times=train_end_times,
            initial_model_group_ids=model_groups,
            models_table='models',
            db_engine=engine)

        return model_group_ids
Beispiel #16
0
def test_ModelEvaluator_needs_evaluation(db_engine):
    ensure_db(db_engine)
    init_engine(db_engine)
    # TEST SETUP:

    # create two models: one that has zero evaluations,
    # one that has an evaluation for precision@100_abs
    model_with_evaluations = ModelFactory()
    model_without_evaluations = ModelFactory()

    eval_time = datetime.datetime(2016, 1, 1)
    as_of_date_frequency = "3d"
    EvaluationFactory(model_rel=model_with_evaluations,
                      evaluation_start_time=eval_time,
                      evaluation_end_time=eval_time,
                      as_of_date_frequency=as_of_date_frequency,
                      metric="precision@",
                      parameter="100_abs")
    session.commit()

    # make a test matrix to pass in
    metadata_overrides = {
        'as_of_date_frequency': as_of_date_frequency,
        'end_time': eval_time,
    }
    test_matrix_store = MockMatrixStore("test",
                                        "1234",
                                        5,
                                        db_engine,
                                        metadata_overrides=metadata_overrides)
    train_matrix_store = MockMatrixStore("train",
                                         "2345",
                                         5,
                                         db_engine,
                                         metadata_overrides=metadata_overrides)

    # the evaluated model has test evaluations for precision, but not recall,
    # so this needs evaluations
    assert ModelEvaluator(testing_metric_groups=[{
        "metrics": ["precision@", "recall@"],
        "thresholds": {
            "top_n": [100]
        },
    }],
                          training_metric_groups=[],
                          db_engine=db_engine).needs_evaluations(
                              matrix_store=test_matrix_store,
                              model_id=model_with_evaluations.model_id,
                          )

    # the evaluated model has test evaluations for precision,
    # so this should not need evaluations
    assert not ModelEvaluator(testing_metric_groups=[{
        "metrics": ["precision@"],
        "thresholds": {
            "top_n": [100]
        },
    }],
                              training_metric_groups=[],
                              db_engine=db_engine).needs_evaluations(
                                  matrix_store=test_matrix_store,
                                  model_id=model_with_evaluations.model_id,
                              )

    # the non-evaluated model has no evaluations,
    # so this should need evaluations
    assert ModelEvaluator(testing_metric_groups=[{
        "metrics": ["precision@"],
        "thresholds": {
            "top_n": [100]
        },
    }],
                          training_metric_groups=[],
                          db_engine=db_engine).needs_evaluations(
                              matrix_store=test_matrix_store,
                              model_id=model_without_evaluations.model_id,
                          )

    # the evaluated model has no *train* evaluations,
    # so the train matrix should need evaluations
    assert ModelEvaluator(testing_metric_groups=[{
        "metrics": ["precision@"],
        "thresholds": {
            "top_n": [100]
        },
    }],
                          training_metric_groups=[{
                              "metrics": ["precision@"],
                              "thresholds": {
                                  "top_n": [100]
                              },
                          }],
                          db_engine=db_engine).needs_evaluations(
                              matrix_store=train_matrix_store,
                              model_id=model_with_evaluations.model_id,
                          )
    session.close()
    session.remove()