Esempio n. 1
0
def test_baseline_exception_handling(sample_matrix_store):
    grid_config = {
        'triage.component.catwalk.baselines.rankers.PercentileRankOneFeature':
        {
            'feature': ['feature_one', 'feature_three']
        }
    }
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        project_path = 'econ-dev/inspections'
        model_storage_engine = S3ModelStorageEngine(project_path)
        ensure_db(db_engine)
        init_engine(db_engine)
        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            trainer = ModelTrainer(project_path='econ-dev/inspections',
                                   experiment_hash=None,
                                   model_storage_engine=model_storage_engine,
                                   db_engine=db_engine,
                                   model_grouper=ModelGrouper())

            train_tasks = trainer.generate_train_tasks(grid_config, dict(),
                                                       sample_matrix_store)
            # Creates a matrix entry in the matrices table with uuid from train_metadata
            MatrixFactory(matrix_uuid="1234")
            session.commit()

            model_ids = []
            for train_task in train_tasks:
                model_ids.append(trainer.process_train_task(**train_task))
            assert model_ids == [1, None]
Esempio n. 2
0
def test_initialize_tracking_and_get_run_id(db_engine_with_results_schema):
    experiment = ExperimentFactory()
    factory_session.commit()
    experiment_hash = experiment.experiment_hash
    run_id = initialize_tracking_and_get_run_id(
        experiment_hash=experiment_hash,
        experiment_class_path="mymodule.MyClassName",
        random_seed=1234,
        experiment_kwargs={"key": "value"},
        db_engine=db_engine_with_results_schema,
    )
    assert run_id
    with scoped_session(db_engine_with_results_schema) as session:
        experiment_run = session.query(TriageRun).get(run_id)
        assert experiment_run.run_hash == experiment_hash
        assert experiment_run.experiment_class_path == "mymodule.MyClassName"
        assert experiment_run.random_seed == 1234
        assert experiment_run.experiment_kwargs == {"key": "value"}
    new_run_id = initialize_tracking_and_get_run_id(
        experiment_hash=experiment_hash,
        experiment_class_path="mymodule.MyClassName",
        random_seed=5432,
        experiment_kwargs={"key": "value"},
        db_engine=db_engine_with_results_schema,
    )
    assert new_run_id > run_id
Esempio n. 3
0
def fake_trained_model(db_engine,
                       train_matrix_uuid="efgh",
                       train_end_time=datetime.datetime(2016, 1, 1)):
    """Creates and stores a trivial trained model and training matrix

    Args:
        db_engine (sqlalchemy.engine)

    Returns:
        (int) model id for database retrieval
    """
    session = sessionmaker(db_engine)()
    session.merge(Matrix(matrix_uuid=train_matrix_uuid))

    # Create the fake trained model and store in db
    trained_model = MockTrainedModel()
    db_model = Model(
        model_hash="abcd",
        train_matrix_uuid=train_matrix_uuid,
        train_end_time=train_end_time,
    )
    session.add(db_model)
    session.commit()
    model_id = db_model.model_id
    session.close()
    return trained_model, model_id
Esempio n. 4
0
def test_custom_groups(sample_matrix_store, grid_config):
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        init_engine(engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')

            MatrixFactory(matrix_uuid="1234")
            session.commit()
            # create training set
            project_path = 'econ-dev/inspections'
            model_storage_engine = S3ModelStorageEngine(project_path)
            trainer = ModelTrainer(
                project_path=project_path,
                experiment_hash=None,
                model_storage_engine=model_storage_engine,
                model_grouper=ModelGrouper(['class_path']),
                db_engine=engine,
            )
            model_ids = trainer.train_models(grid_config=grid_config,
                                             misc_db_parameters=dict(),
                                             matrix_store=sample_matrix_store)
            # expect only one model group now
            records = [
                row[0] for row in engine.execute(
                    'select distinct model_group_id from model_metadata.models'
                )
            ]
            assert len(records) == 1
            assert records[0] == model_ids[0]
Esempio n. 5
0
def get_matrix_store(project_storage,
                     matrix=None,
                     metadata=None,
                     write_to_db=True):
    """Return a matrix store associated with the given project storage.
    Also adds an entry in the matrices table if it doesn't exist already

    Args:
        project_storage (triage.component.catwalk.storage.ProjectStorage) A project's storage
        matrix (dataframe, optional): A matrix to store. Defaults to the output of matrix_creator()
        metadata (dict, optional): matrix metadata.
            defaults to the output of matrix_metadata_creator()
    """
    if matrix is None:
        matrix = matrix_creator()
    if not metadata:
        metadata = matrix_metadata_creator()
    matrix["as_of_date"] = matrix["as_of_date"].apply(pd.Timestamp)
    matrix.set_index(MatrixStore.indices, inplace=True)
    matrix_store = project_storage.matrix_storage_engine().get_store(
        filename_friendly_hash(metadata))
    matrix_store.metadata = metadata
    new_matrix = matrix.copy()
    labels = new_matrix.pop(matrix_store.label_column_name)
    matrix_store.matrix_label_tuple = new_matrix, labels
    matrix_store.save()
    matrix_store.clear_cache()
    if write_to_db:
        if (session.query(Matrix).filter(
                Matrix.matrix_uuid == matrix_store.uuid).count() == 0):
            MatrixFactory(matrix_uuid=matrix_store.uuid)
            session.commit()
    return matrix_store
Esempio n. 6
0
def test_uniform_distribution_entity_id_index():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        model = ModelFactory()
        feature_importances = [
            FeatureImportanceFactory(model_rel=model,
                                     feature='feature_{}'.format(i))
            for i in range(0, 10)
        ]
        data_dict = {'entity_id': [1, 2]}
        for imp in feature_importances:
            data_dict[imp.feature] = [0.5, 0.5]
        test_store = InMemoryMatrixStore(
            matrix=pandas.DataFrame.from_dict(data_dict),
            metadata=sample_metadata())
        session.commit()
        results = uniform_distribution(db_engine,
                                       model_id=model.model_id,
                                       as_of_date='2016-01-01',
                                       test_matrix_store=test_store,
                                       n_ranks=5)

        assert len(results) == 10  # 5 features x 2 entities
        for result in results:
            assert 'entity_id' in result
            assert 'feature_name' in result
            assert 'score' in result
            assert 'feature_value' in result
            assert result['feature_value'] == 0.5
            assert result['score'] >= 0
            assert result['score'] <= 1
            assert isinstance(result['feature_name'], str)
            assert result['entity_id'] in [1, 2]
Esempio n. 7
0
    def filter_same_train_end_times(self, engine):
        ensure_db(engine)
        init_engine(engine)
        mg1 = ModelGroupFactory(model_group_id=1, model_type='modelType1')
        mg2 = ModelGroupFactory(model_group_id=2, model_type='modelType2')
        mg3 = ModelGroupFactory(model_group_id=3, model_type='modelType3')
        mg4 = ModelGroupFactory(model_group_id=4, model_type='modelType4')
        # model group 1
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2017, 1, 1))
        # model group 2 only has three timestamps, should not pass
        ModelFactory(model_group_rel=mg2, train_end_time=datetime(2014, 1, 1))
        # model group 3
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2017, 1, 1))
        # model group 4 only has three timestamps, should not pass
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2016, 1, 1))

        session.commit()
        train_end_times = [
            '2014-01-01', '2015-01-01', '2016-01-01', '2017-01-01'
        ]
        model_groups = [1, 2, 3, 4]
        model_group_ids = model_groups_filter(
            train_end_times=train_end_times,
            initial_model_group_ids=model_groups,
            models_table='models',
            db_engine=engine)

        return model_group_ids
Esempio n. 8
0
        def replace_db(arg):
            self.new_server = testing.postgresql.Postgresql(port=port)
            db_engine = create_engine(self.new_server.url())
            ensure_db(db_engine)
            init_engine(db_engine)

            # Creates a matrix entry in the matrices table with uuid from train_metadata
            MatrixFactory(matrix_uuid="1234")
            session.commit()
Esempio n. 9
0
def test_increment_field(db_engine_with_results_schema):
    experiment_run = ExperimentRunFactory()
    factory_session.commit()
    increment_field('matrices_made', experiment_run.run_id, db_engine_with_results_schema)
    increment_field('matrices_made', experiment_run.run_id, db_engine_with_results_schema)

    with scoped_session(db_engine_with_results_schema) as session:
        experiment_run_from_db = session.query(ExperimentRun).get(experiment_run.run_id)
        assert experiment_run_from_db.matrices_made == 2
Esempio n. 10
0
def update_ranks_test(predictor,
                      entities_scores_labels,
                      rank_col,
                      expected_result,
                      model_random_seed=12345,
                      need_seed_data=True):
    """Not a test in itself but rather a utility called by many of the ranking tests"""
    ensure_db(predictor.db_engine)
    init_engine(predictor.db_engine)
    model_id = 5
    matrix_uuid = "4567"
    matrix_type = "test"
    as_of_date = datetime.datetime(2012, 1, 1)
    if need_seed_data:
        matrix = MatrixFactory(matrix_uuid=matrix_uuid)
        model = ModelFactory(model_id=model_id, random_seed=model_random_seed)
        for entity_id, score, label in entities_scores_labels:
            PredictionFactory(model_rel=model,
                              matrix_rel=matrix,
                              as_of_date=as_of_date,
                              entity_id=entity_id,
                              score=score,
                              label_value=int(label))
        factory_session.commit()
    predictor.update_db_with_ranks(
        model_id=model_id,
        matrix_uuid=matrix_uuid,
        matrix_type=TestMatrixType,
    )
    ranks = tuple(row for row in predictor.db_engine.execute(
        f'''
select entity_id, {rank_col}::float
from {matrix_type}_results.predictions
where as_of_date = %s and model_id = %s and matrix_uuid = %s order by {rank_col} asc''',
        (as_of_date, model_id, matrix_uuid)))
    assert ranks == expected_result

    # Test that the predictions metadata table is populated
    metadata_records = [
        row for row in predictor.db_engine.execute(
            f"""select tiebreaker_ordering, prediction_metadata.random_seed, models.random_seed
        from {matrix_type}_results.prediction_metadata
        join triage_metadata.models using (model_id)
        join triage_metadata.matrices using (matrix_uuid)
        """)
    ]
    assert len(metadata_records) == 1
    tiebreaker_ordering, random_seed, received_model_random_seed = metadata_records[
        0]
    if tiebreaker_ordering == 'random':
        assert random_seed is model_random_seed
    else:
        assert not random_seed
    assert tiebreaker_ordering == predictor.rank_order
    assert received_model_random_seed == model_random_seed
Esempio n. 11
0
def test_get_run_for_update(db_engine_with_results_schema):
    experiment_run = TriageRunFactory()
    factory_session.commit()
    with get_run_for_update(db_engine=db_engine_with_results_schema,
                            run_id=experiment_run.run_id) as run_obj:
        run_obj.stacktrace = "My stacktrace"

    with scoped_session(db_engine_with_results_schema) as session:
        experiment_run_from_db = session.query(TriageRun).get(
            experiment_run.run_id)
        assert experiment_run_from_db.stacktrace == "My stacktrace"
Esempio n. 12
0
def test_Audition():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        num_model_groups = 10
        model_types = [
            "classifier type {}".format(i) for i in range(0, num_model_groups)
        ]
        model_groups = [
            ModelGroupFactory(model_type=model_type)
            for model_type in model_types
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2014, 1, 1),
            datetime(2015, 1, 1),
            datetime(2016, 1, 1),
        ]

        models = [
            ModelFactory(model_group_rel=model_group,
                         train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ("precision@", "100_abs"),
            ("recall@", "100_abs"),
            ("precision@", "50_abs"),
            ("recall@", "50_abs"),
            ("fpr@", "10_pct"),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(model_rel=model,
                                     metric=metric,
                                     parameter=parameter)

        session.commit()

        with tempfile.TemporaryDirectory() as td:
            with mock.patch('os.getcwd') as mock_getcwd:
                mock_getcwd.return_value = td
                AuditionRunner(config_dict=config,
                               db_engine=db_engine,
                               directory=td).run()
                assert len(os.listdir(os.getcwd())) == 6
Esempio n. 13
0
def test_ModelEvaluator_needs_evaluation_with_bias_audit(
        db_engine_with_results_schema):
    # test that if a bias audit config is passed, and there are no matching bias audits
    # in the database, needs_evaluation returns true
    # this all assumes that evaluations are populated. those tests are in the 'no_bias_audit' test
    model_evaluator = ModelEvaluator(
        testing_metric_groups=[
            {
                "metrics": ["precision@"],
                "thresholds": {
                    "top_n": [3]
                },
            },
        ],
        training_metric_groups=[],
        bias_config={'thresholds': {
            'top_n': [2]
        }},
        db_engine=db_engine_with_results_schema,
    )
    model_with_evaluations = ModelFactory()

    eval_time = datetime.datetime(2016, 1, 1)
    as_of_date_frequency = "3d"
    for subset_hash in [""]:
        EvaluationFactory(
            model_rel=model_with_evaluations,
            evaluation_start_time=eval_time,
            evaluation_end_time=eval_time,
            as_of_date_frequency=as_of_date_frequency,
            metric="precision@",
            parameter="3_abs",
            subset_hash=subset_hash,
        )
    session.commit()

    # make a test matrix to pass in
    metadata_overrides = {
        'as_of_date_frequency': as_of_date_frequency,
        'as_of_times': [eval_time],
    }
    test_matrix_store = MockMatrixStore("test",
                                        "1234",
                                        5,
                                        db_engine_with_results_schema,
                                        metadata_overrides=metadata_overrides)
    assert model_evaluator.needs_evaluations(
        matrix_store=test_matrix_store,
        model_id=model_with_evaluations.model_id,
        subset_hash="",
    )
Esempio n. 14
0
def test_n_jobs_not_new_model(sample_matrix_store):
    grid_config = {
        'sklearn.ensemble.AdaBoostClassifier': {
            'n_estimators': [10, 100, 1000]
        },
        'sklearn.ensemble.RandomForestClassifier': {
            'n_estimators': [10, 100],
            'max_features': ['sqrt', 'log2'],
            'max_depth': [5, 10, 15, 20],
            'criterion': ['gini', 'entropy'],
            'n_jobs': [12, 24],
        }
    }

    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            trainer = ModelTrainer(project_path='econ-dev/inspections',
                                   experiment_hash=None,
                                   model_storage_engine=S3ModelStorageEngine(
                                       'econ-dev/inspections'),
                                   db_engine=db_engine,
                                   model_grouper=ModelGrouper())

            train_tasks = trainer.generate_train_tasks(
                grid_config,
                dict(),
                sample_matrix_store,
            )
            # Creates a matrix entry in the matrices table with uuid from train_metadata
            MatrixFactory(matrix_uuid="1234")
            session.commit()

            assert len(train_tasks
                       ) == 35  # 32+3, would be (32*2)+3 if we didn't remove
            assert len([
                task for task in train_tasks if 'n_jobs' in task['parameters']
            ]) == 32

            for train_task in train_tasks:
                trainer.process_train_task(**train_task)

            for row in db_engine.execute(
                    'select model_parameters from model_metadata.model_groups'
            ):
                assert 'n_jobs' not in row[0]
Esempio n. 15
0
    def __init__(
        self,
        matrix_type,
        matrix_uuid,
        label_count,
        db_engine,
        init_labels=None,
        metadata_overrides=None,
        matrix=None,
        init_as_of_dates=None,
    ):
        base_metadata = {
            "feature_start_time": datetime.date(2014, 1, 1),
            "end_time": datetime.date(2015, 1, 1),
            "as_of_date_frequency": "1y",
            "matrix_id": "some_matrix",
            "label_name": "label",
            "label_timespan": "3month",
            "indices": MatrixStore.indices,
            "matrix_type": matrix_type,
            "as_of_times":
            [datetime.date(2014, 10, 1),
             datetime.date(2014, 7, 1)],
        }
        metadata_overrides = metadata_overrides or {}
        base_metadata.update(metadata_overrides)
        if matrix is None:
            matrix = pd.DataFrame.from_dict({
                "entity_id": [1, 2],
                "as_of_date":
                [pd.Timestamp(2014, 10, 1),
                 pd.Timestamp(2014, 7, 1)],
                "feature_one": [3, 4],
                "feature_two": [5, 6],
                "label": [7, 8],
            }).set_index(MatrixStore.indices)
        if init_labels is None:
            init_labels = []
        labels = matrix.pop("label")
        self.matrix_label_tuple = matrix, labels
        self.metadata = base_metadata
        self.label_count = label_count
        self.init_labels = pd.Series(init_labels, dtype="float64")
        self.matrix_uuid = matrix_uuid
        self.init_as_of_dates = init_as_of_dates or []

        session = sessionmaker(db_engine)()
        session.add(Matrix(matrix_uuid=matrix_uuid))
        session.commit()
Esempio n. 16
0
def test_prediction_ranks_multiple_dates(project_storage, db_engine):
    """make sure that multiple as-of-dates in a single matrix are handled correctly.
    keep the other variables simple by making no within-date ties that would end up
    testing the tiebreaker logic, just data for two dates with data that could theoretically
    confound a bad ranking method:
    - a different order for entities in both dates
    - each date has some not in the other
    """
    ensure_db(db_engine)
    init_engine(db_engine)
    predictor = Predictor(project_storage.model_storage_engine(), db_engine,
                          'worst')
    model_id = 5
    matrix_uuid = "4567"
    matrix_type = "test"
    entities_dates_and_scores = (
        (23, datetime.datetime(2012, 1, 1), 0.95),
        (34, datetime.datetime(2012, 1, 1), 0.94),
        (45, datetime.datetime(2013, 1, 1), 0.92),
        (23, datetime.datetime(2013, 1, 1), 0.45),
    )
    expected_result = (
        (23, datetime.datetime(2012, 1, 1), 1),
        (34, datetime.datetime(2012, 1, 1), 2),
        (45, datetime.datetime(2013, 1, 1), 3),
        (23, datetime.datetime(2013, 1, 1), 4),
    )
    matrix = MatrixFactory(matrix_uuid=matrix_uuid)
    model = ModelFactory(model_id=model_id)
    for entity_id, as_of_date, score in entities_dates_and_scores:
        PredictionFactory(model_rel=model,
                          matrix_rel=matrix,
                          as_of_date=as_of_date,
                          entity_id=entity_id,
                          score=score)
    factory_session.commit()
    predictor.update_db_with_ranks(
        model_id=model_id,
        matrix_uuid=matrix_uuid,
        matrix_type=TestMatrixType,
    )
    ranks = tuple(row for row in predictor.db_engine.execute(
        f'''
select entity_id, as_of_date, rank_abs_no_ties
from {matrix_type}_results.predictions
where model_id = %s and matrix_uuid = %s order by rank_abs_no_ties''', (
            model_id, matrix_uuid)))
    assert ranks == expected_result
Esempio n. 17
0
def test_predictor_get_train_columns():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        project_path = 'econ-dev/inspections'
        with tempfile.TemporaryDirectory() as temp_dir:
            train_store, test_store = sample_metta_csv_diff_order(temp_dir)

            model_storage_engine = InMemoryModelStorageEngine(project_path)
            _, model_id = \
                fake_trained_model(
                    project_path,
                    model_storage_engine,
                    db_engine,
                    train_matrix_uuid=train_store.uuid
                )
            predictor = Predictor(project_path, model_storage_engine,
                                  db_engine)

            # The train_store uuid is stored in fake_trained_model. Storing the other
            MatrixFactory(matrix_uuid=test_store.uuid)
            session.commit()

            # Runs the same test for training and testing predictions
            for store, mat_type in zip((train_store, test_store),
                                       ("train", "test")):
                predict_proba = predictor.predict(
                    model_id,
                    store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=train_store.columns())
                # assert
                # 1. that we calculated predictions
                assert len(predict_proba) > 0

                # 2. that the predictions table entries are present and
                # can be linked to the original models
                records = [
                    row for row in db_engine.execute(
                        '''select entity_id, as_of_date
                    from {}_results.{}_predictions
                    join model_metadata.models using (model_id)'''.format(
                            mat_type, mat_type))
                ]
                assert len(records) > 0
Esempio n. 18
0
def fake_trained_model(db_engine, train_matrix_uuid='efgh'):
    """Creates and stores a trivial trained model and training matrix

    Args:
        db_engine (sqlalchemy.engine)

    Returns:
        (int) model id for database retrieval
    """
    session = sessionmaker(db_engine)()
    session.merge(Matrix(matrix_uuid=train_matrix_uuid))

    # Create the fake trained model and store in db
    trained_model = MockTrainedModel()
    db_model = Model(model_hash='abcd', train_matrix_uuid=train_matrix_uuid)
    session.add(db_model)
    session.commit()
    return trained_model, db_model.model_id
Esempio n. 19
0
def get_matrix_store(project_storage, matrix=None, metadata=None):
    """Return a matrix store associated with the given project storage. Also adds an entry in the matrices table if it doesn't exist already

    Args:
        project_storage (triage.component.catwalk.storage.ProjectStorage) A project's storage
        matrix (dataframe, optional): A matrix to store. Defaults to the output of matrix_creator()
        metadata (dict, optional): matrix metadata. defaults to the output of matrix_metadata_creator()
    """
    if matrix is None:
        matrix = matrix_creator()
    if not metadata:
        metadata = matrix_metadata_creator()
    matrix_store = project_storage.matrix_storage_engine().get_store(metadata['metta-uuid'])
    matrix_store.matrix = matrix
    matrix_store.metadata = metadata
    matrix_store.save()
    if session.query(Matrix).filter(Matrix.matrix_uuid == matrix_store.uuid).count() == 0:
        MatrixFactory(matrix_uuid=matrix_store.uuid)
        session.commit()
    return matrix_store
Esempio n. 20
0
    def filter_train_end_times(self, engine, train_end_times):
        ensure_db(engine)
        init_engine(engine)
        mg1 = ModelGroupFactory(model_group_id=1, model_type="modelType1")
        mg2 = ModelGroupFactory(model_group_id=2, model_type="modelType2")
        mg3 = ModelGroupFactory(model_group_id=3, model_type="modelType3")
        mg4 = ModelGroupFactory(model_group_id=4, model_type="modelType4")
        mg5 = ModelGroupFactory(model_group_id=5, model_type="modelType5")
        # model group 1
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg1, train_end_time=datetime(2017, 1, 1))
        # model group 2 only has one timestamps
        ModelFactory(model_group_rel=mg2, train_end_time=datetime(2014, 1, 1))
        # model group 3
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2016, 1, 1))
        ModelFactory(model_group_rel=mg3, train_end_time=datetime(2017, 1, 1))
        # model group 4 only has two timestamps
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg4, train_end_time=datetime(2016, 1, 1))
        # model group 5 only has three timestamps
        ModelFactory(model_group_rel=mg5, train_end_time=datetime(2014, 1, 1))
        ModelFactory(model_group_rel=mg5, train_end_time=datetime(2015, 1, 1))
        ModelFactory(model_group_rel=mg5, train_end_time=datetime(2016, 1, 1))

        session.commit()
        model_groups = [1, 2, 3, 4, 5]
        model_group_ids = model_groups_filter(
            train_end_times=train_end_times,
            initial_model_group_ids=model_groups,
            models_table="models",
            db_engine=engine,
        )

        return model_group_ids
Esempio n. 21
0
def test_initialize_tracking_and_get_run_id(db_engine_with_results_schema):
    experiment = ExperimentFactory()
    factory_session.commit()
    experiment_hash = experiment.experiment_hash
    run_id = initialize_tracking_and_get_run_id(
        experiment_hash=experiment_hash,
        experiment_class_path='mymodule.MyClassName',
        experiment_kwargs={'key': 'value'},
        db_engine=db_engine_with_results_schema
    )
    assert run_id
    with scoped_session(db_engine_with_results_schema) as session:
        experiment_run = session.query(ExperimentRun).get(run_id)
        assert experiment_run.experiment_hash == experiment_hash
        assert experiment_run.experiment_class_path == 'mymodule.MyClassName'
        assert experiment_run.experiment_kwargs == {'key': 'value'}
    new_run_id = initialize_tracking_and_get_run_id(
        experiment_hash=experiment_hash,
        experiment_class_path='mymodule.MyClassName',
        experiment_kwargs={'key': 'value'},
        db_engine=db_engine_with_results_schema
    )
    assert new_run_id > run_id
Esempio n. 22
0
def test_model_trainer(sample_matrix_store, grid_config):
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')

            # Creates a matrix entry in the matrices table with uuid from metadata above
            MatrixFactory(matrix_uuid="1234")
            session.commit()
            project_path = 'econ-dev/inspections'
            model_storage_engine = S3ModelStorageEngine(project_path)
            trainer = ModelTrainer(
                project_path=project_path,
                experiment_hash=None,
                model_storage_engine=model_storage_engine,
                model_grouper=ModelGrouper(),
                db_engine=db_engine,
            )
            model_ids = trainer.train_models(grid_config=grid_config,
                                             misc_db_parameters=dict(),
                                             matrix_store=sample_matrix_store)

            # assert
            # 1. that the models and feature importances table entries are present
            records = [
                row for row in db_engine.execute(
                    'select * from train_results.feature_importances')
            ]
            assert len(records) == 4 * 2  # maybe exclude entity_id? yes

            records = [
                row for row in db_engine.execute(
                    'select model_hash from model_metadata.models')
            ]
            assert len(records) == 4
            hashes = [row[0] for row in records]

            # 2. that the model groups are distinct
            records = [
                row for row in db_engine.execute(
                    'select distinct model_group_id from model_metadata.models'
                )
            ]
            assert len(records) == 4

            # 3. that the model sizes are saved in the table and all are < 1 kB
            records = [
                row for row in db_engine.execute(
                    'select model_size from model_metadata.models')
            ]
            assert len(records) == 4
            for i in records:
                size = i[0]
                assert size < 1

            # 4. that all four models are cached
            model_pickles = [
                model_storage_engine.get_store(model_hash).load()
                for model_hash in hashes
            ]
            assert len(model_pickles) == 4
            assert len([x for x in model_pickles if x is not None]) == 4

            # 5. that their results can have predictions made on it
            test_matrix = pandas.DataFrame.from_dict({
                'entity_id': [3, 4],
                'feature_one': [4, 4],
                'feature_two': [6, 5],
            })

            test_matrix = InMemoryMatrixStore(matrix=test_matrix, metadata=sample_metadata())\
                .matrix

            for model_pickle in model_pickles:
                predictions = model_pickle.predict(test_matrix)
                assert len(predictions) == 2

            # 6. when run again, same models are returned
            new_model_ids = trainer.train_models(
                grid_config=grid_config,
                misc_db_parameters=dict(),
                matrix_store=sample_matrix_store)
            assert len([
                row for row in db_engine.execute(
                    'select model_hash from model_metadata.models')
            ]) == 4
            assert model_ids == new_model_ids

            # 7. if replace is set, update non-unique attributes and feature importances
            max_batch_run_time = [
                row[0] for row in db_engine.execute(
                    'select max(batch_run_time) from model_metadata.models')
            ][0]
            trainer = ModelTrainer(
                project_path=project_path,
                experiment_hash=None,
                model_storage_engine=model_storage_engine,
                model_grouper=ModelGrouper(
                    model_group_keys=['label_name', 'label_timespan']),
                db_engine=db_engine,
                replace=True)
            new_model_ids = trainer.train_models(
                grid_config=grid_config,
                misc_db_parameters=dict(),
                matrix_store=sample_matrix_store,
            )
            assert model_ids == new_model_ids
            assert [
                row['model_id'] for row in db_engine.execute(
                    'select model_id from model_metadata.models order by 1 asc'
                )
            ] == model_ids
            new_max_batch_run_time = [
                row[0] for row in db_engine.execute(
                    'select max(batch_run_time) from model_metadata.models')
            ][0]
            assert new_max_batch_run_time > max_batch_run_time

            records = [
                row for row in db_engine.execute(
                    'select * from train_results.feature_importances')
            ]
            assert len(records) == 4 * 2  # maybe exclude entity_id? yes

            # 8. if the cache is missing but the metadata is still there, reuse the metadata
            for row in db_engine.execute(
                    'select model_hash from model_metadata.models'):
                model_storage_engine.get_store(row[0]).delete()
            new_model_ids = trainer.train_models(
                grid_config=grid_config,
                misc_db_parameters=dict(),
                matrix_store=sample_matrix_store)
            assert model_ids == sorted(new_model_ids)

            # 9. that the generator interface works the same way
            new_model_ids = trainer.generate_trained_models(
                grid_config=grid_config,
                misc_db_parameters=dict(),
                matrix_store=sample_matrix_store)
            assert model_ids == \
                sorted([model_id for model_id in new_model_ids])
Esempio n. 23
0
def test_ModelEvaluator_needs_evaluation_no_bias_audit(db_engine_with_results_schema):
    # TEST SETUP:

    # create two models: one that has zero evaluations,
    # one that has an evaluation for precision@100_abs
    # both overall and for each subset
    model_with_evaluations = ModelFactory()
    model_without_evaluations = ModelFactory()

    eval_time = datetime.datetime(2016, 1, 1)
    as_of_date_frequency = "3d"
    for subset_hash in [""] + [filename_friendly_hash(subset) for subset in SUBSETS]:
        EvaluationFactory(
            model_rel=model_with_evaluations,
            evaluation_start_time=eval_time,
            evaluation_end_time=eval_time,
            as_of_date_frequency=as_of_date_frequency,
            metric="precision@",
            parameter="100_abs",
            subset_hash=subset_hash,
        )
    session.commit()

    # make a test matrix to pass in
    metadata_overrides = {
        "as_of_date_frequency": as_of_date_frequency,
        "as_of_times": [eval_time],
    }
    test_matrix_store = MockMatrixStore(
        "test",
        "1234",
        5,
        db_engine_with_results_schema,
        metadata_overrides=metadata_overrides,
    )
    train_matrix_store = MockMatrixStore(
        "train",
        "2345",
        5,
        db_engine_with_results_schema,
        metadata_overrides=metadata_overrides,
    )

    # the evaluated model has test evaluations for precision, but not recall,
    # so this needs evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@", "recall@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=test_matrix_store,
            model_id=model_with_evaluations.model_id,
            subset_hash=subset_hash,
        )

    # the evaluated model has test evaluations for precision,
    # so this should not need evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert not ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=test_matrix_store,
            model_id=model_with_evaluations.model_id,
            subset_hash=subset_hash,
        )

    # the non-evaluated model has no evaluations,
    # so this should need evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=test_matrix_store,
            model_id=model_without_evaluations.model_id,
            subset_hash=subset_hash,
        )

    # the evaluated model has no *train* evaluations,
    # so the train matrix should need evaluations
    for subset in SUBSETS:
        if not subset:
            subset_hash = ""
        else:
            subset_hash = filename_friendly_hash(subset)

        assert ModelEvaluator(
            testing_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            training_metric_groups=[
                {
                    "metrics": ["precision@"],
                    "thresholds": {"top_n": [100]},
                }
            ],
            db_engine=db_engine_with_results_schema,
        ).needs_evaluations(
            matrix_store=train_matrix_store,
            model_id=model_with_evaluations.model_id,
            subset_hash=subset_hash,
        )
    session.close()
    session.remove()
Esempio n. 24
0
def test_PreAudition():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        # set up data, randomly generated by the factories but conforming
        # generally to what we expect triage_metadata schema data to look like
        num_model_groups = 10
        model_types = [
            "classifier type {}".format(i) for i in range(0, num_model_groups)
        ]
        model_configs = [
            {"label_definition": "label_1"}
            if i % 2 == 0
            else {"label_definition": "label_2"}
            for i in range(0, num_model_groups)
        ]
        model_groups = [
            ModelGroupFactory(model_type=model_type, model_config=model_config)
            for model_type, model_config in zip(model_types, model_configs)
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2013, 7, 1),
            datetime(2014, 1, 1),
            datetime(2014, 7, 1),
            datetime(2015, 1, 1),
            datetime(2015, 7, 1),
            datetime(2016, 7, 1),
            datetime(2016, 1, 1),
        ]

        models = [
            ModelFactory(model_group_rel=model_group, train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ("precision@", "100_abs"),
            ("recall@", "100_abs"),
            ("precision@", "50_abs"),
            ("recall@", "50_abs"),
            ("fpr@", "10_pct"),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time
            )

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(
                    model_rel=model, metric=metric, parameter=parameter
                )

        session.commit()

        pre_aud = PreAudition(db_engine)

        # Expect the number of model groups with label_1
        assert len(pre_aud.get_model_groups_from_label("label_1")['model_groups']) == sum(
            [x["label_definition"] == "label_1" for x in model_configs]
        )

        # Expect no baseline model groups
        assert len(pre_aud.get_model_groups_from_label("label_1")['baseline_model_groups']) == 0

        # Expect the number of model groups with certain experiment_hash
        experiment_hash = list(
            pd.read_sql(
                """SELECT experiment_hash
                FROM triage_metadata.models
                JOIN triage_metadata.experiment_models using (model_hash)
                limit 1""",
                con=db_engine,
            )["experiment_hash"]
        )[0]
        assert len(pre_aud.get_model_groups_from_experiment(experiment_hash)['model_groups']) == 1

        # Expect the number of model groups for customs SQL
        query = """
            SELECT DISTINCT(model_group_id)
            FROM triage_metadata.models
            JOIN triage_metadata.experiment_models using (model_hash)
            WHERE train_end_time >= '2013-01-01'
            AND experiment_hash = '{}'
        """.format(
            experiment_hash
        )
        assert len(pre_aud.get_model_groups(query)) == 1
        # Expect the number of train_end_times after 2014-01-01
        assert len(pre_aud.get_train_end_times(after="2014-01-01")) == 6

        query = """
            SELECT DISTINCT train_end_time
            FROM triage_metadata.models
            WHERE model_group_id IN ({})
                AND train_end_time >= '2014-01-01'
            ORDER BY train_end_time
            """.format(
            ", ".join(map(str, pre_aud.model_groups))
        )

        assert len(pre_aud.get_train_end_times(query=query)) == 6
Esempio n. 25
0
def create_sample_distance_table(engine):
    ensure_db(engine)
    init_engine(engine)
    model_groups = {
        'stable': ModelGroupFactory(model_type='myStableClassifier'),
        'spiky': ModelGroupFactory(model_type='mySpikeClassifier'),
    }

    class StableModelFactory(ModelFactory):
        model_group_rel = model_groups['stable']

    class SpikyModelFactory(ModelFactory):
        model_group_rel = model_groups['spiky']

    models = {
        'stable_3y_ago': StableModelFactory(train_end_time='2014-01-01'),
        'stable_2y_ago': StableModelFactory(train_end_time='2015-01-01'),
        'stable_1y_ago': StableModelFactory(train_end_time='2016-01-01'),
        'spiky_3y_ago': SpikyModelFactory(train_end_time='2014-01-01'),
        'spiky_2y_ago': SpikyModelFactory(train_end_time='2015-01-01'),
        'spiky_1y_ago': SpikyModelFactory(train_end_time='2016-01-01'),
    }
    session.commit()
    distance_table = DistanceFromBestTable(db_engine=engine,
                                           models_table='models',
                                           distance_table='dist_table')
    distance_table._create()
    stable_grp = model_groups['stable'].model_group_id
    spiky_grp = model_groups['spiky'].model_group_id
    stable_3y_id = models['stable_3y_ago'].model_id
    stable_3y_end = models['stable_3y_ago'].train_end_time
    stable_2y_id = models['stable_2y_ago'].model_id
    stable_2y_end = models['stable_2y_ago'].train_end_time
    stable_1y_id = models['stable_1y_ago'].model_id
    stable_1y_end = models['stable_1y_ago'].train_end_time
    spiky_3y_id = models['spiky_3y_ago'].model_id
    spiky_3y_end = models['spiky_3y_ago'].train_end_time
    spiky_2y_id = models['spiky_2y_ago'].model_id
    spiky_2y_end = models['spiky_2y_ago'].train_end_time
    spiky_1y_id = models['spiky_1y_ago'].model_id
    spiky_1y_end = models['spiky_1y_ago'].train_end_time
    distance_rows = [
        (stable_grp, stable_3y_id, stable_3y_end, 'precision@', '100_abs', 0.5,
         0.6, 0.1, 0.5, 0.15),
        (stable_grp, stable_2y_id, stable_2y_end, 'precision@', '100_abs', 0.5,
         0.84, 0.34, 0.5, 0.18),
        (stable_grp, stable_1y_id, stable_1y_end, 'precision@', '100_abs',
         0.46, 0.67, 0.21, 0.5, 0.11),
        (spiky_grp, spiky_3y_id, spiky_3y_end, 'precision@', '100_abs', 0.45,
         0.6, 0.15, 0.5, 0.19),
        (spiky_grp, spiky_2y_id, spiky_2y_end, 'precision@', '100_abs', 0.84,
         0.84, 0.0, 0.5, 0.3),
        (spiky_grp, spiky_1y_id, spiky_1y_end, 'precision@', '100_abs', 0.45,
         0.67, 0.22, 0.5, 0.12),
        (stable_grp, stable_3y_id, stable_3y_end, 'recall@', '100_abs', 0.4,
         0.4, 0.0, 0.4, 0.0),
        (stable_grp, stable_2y_id, stable_2y_end, 'recall@', '100_abs', 0.5,
         0.5, 0.0, 0.5, 0.0),
        (stable_grp, stable_1y_id, stable_1y_end, 'recall@', '100_abs', 0.6,
         0.6, 0.0, 0.6, 0.0),
        (spiky_grp, spiky_3y_id, spiky_3y_end, 'recall@', '100_abs', 0.65,
         0.65, 0.0, 0.65, 0.0),
        (spiky_grp, spiky_2y_id, spiky_2y_end, 'recall@', '100_abs', 0.55,
         0.55, 0.0, 0.55, 0.0),
        (spiky_grp, spiky_1y_id, spiky_1y_end, 'recall@', '100_abs', 0.45,
         0.45, 0.0, 0.45, 0.0),
    ]
    for dist_row in distance_rows:
        engine.execute(
            'insert into dist_table values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
            dist_row)
    return distance_table, model_groups
Esempio n. 26
0
def test_DistanceFromBestTable():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        init_engine(engine)
        model_groups = {
            "stable": ModelGroupFactory(model_type="myStableClassifier"),
            "bad": ModelGroupFactory(model_type="myBadClassifier"),
            "spiky": ModelGroupFactory(model_type="mySpikeClassifier"),
        }

        class StableModelFactory(ModelFactory):
            model_group_rel = model_groups["stable"]

        class BadModelFactory(ModelFactory):
            model_group_rel = model_groups["bad"]

        class SpikyModelFactory(ModelFactory):
            model_group_rel = model_groups["spiky"]

        models = {
            "stable_3y_ago": StableModelFactory(train_end_time="2014-01-01"),
            "stable_2y_ago": StableModelFactory(train_end_time="2015-01-01"),
            "stable_1y_ago": StableModelFactory(train_end_time="2016-01-01"),
            "bad_3y_ago": BadModelFactory(train_end_time="2014-01-01"),
            "bad_2y_ago": BadModelFactory(train_end_time="2015-01-01"),
            "bad_1y_ago": BadModelFactory(train_end_time="2016-01-01"),
            "spiky_3y_ago": SpikyModelFactory(train_end_time="2014-01-01"),
            "spiky_2y_ago": SpikyModelFactory(train_end_time="2015-01-01"),
            "spiky_1y_ago": SpikyModelFactory(train_end_time="2016-01-01"),
        }

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)
            evaluation_end_time = factory.LazyAttribute(
                lambda o: _sql_add_days(o.model_rel.train_end_time, 1))

        class MonthOutEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: _sql_add_days(o.model_rel.train_end_time, 31))
            evaluation_end_time = factory.LazyAttribute(
                lambda o: _sql_add_days(o.model_rel.train_end_time, 32))

        class Precision100Factory(ImmediateEvalFactory):
            metric = "precision@"
            parameter = "100_abs"

        class Precision100FactoryMonthOut(MonthOutEvalFactory):
            metric = "precision@"
            parameter = "100_abs"

        class Recall100Factory(ImmediateEvalFactory):
            metric = "recall@"
            parameter = "100_abs"

        class Recall100FactoryMonthOut(MonthOutEvalFactory):
            metric = "recall@"
            parameter = "100_abs"

        for (add_val, PrecFac, RecFac) in (
            (0, Precision100Factory, Recall100Factory),
            (-0.15, Precision100FactoryMonthOut, Recall100FactoryMonthOut),
        ):
            PrecFac(model_rel=models["stable_3y_ago"], value=0.6 + add_val)
            PrecFac(model_rel=models["stable_2y_ago"], value=0.57 + add_val)
            PrecFac(model_rel=models["stable_1y_ago"], value=0.59 + add_val)
            PrecFac(model_rel=models["bad_3y_ago"], value=0.4 + add_val)
            PrecFac(model_rel=models["bad_2y_ago"], value=0.39 + add_val)
            PrecFac(model_rel=models["bad_1y_ago"], value=0.43 + add_val)
            PrecFac(model_rel=models["spiky_3y_ago"], value=0.8 + add_val)
            PrecFac(model_rel=models["spiky_2y_ago"], value=0.4 + add_val)
            PrecFac(model_rel=models["spiky_1y_ago"], value=0.4 + add_val)
            RecFac(model_rel=models["stable_3y_ago"], value=0.55 + add_val)
            RecFac(model_rel=models["stable_2y_ago"], value=0.56 + add_val)
            RecFac(model_rel=models["stable_1y_ago"], value=0.55 + add_val)
            RecFac(model_rel=models["bad_3y_ago"], value=0.35 + add_val)
            RecFac(model_rel=models["bad_2y_ago"], value=0.34 + add_val)
            RecFac(model_rel=models["bad_1y_ago"], value=0.36 + add_val)
            RecFac(model_rel=models["spiky_3y_ago"], value=0.35 + add_val)
            RecFac(model_rel=models["spiky_2y_ago"], value=0.8 + add_val)
            RecFac(model_rel=models["spiky_1y_ago"], value=0.36 + add_val)
        session.commit()
        distance_table = DistanceFromBestTable(db_engine=engine,
                                               models_table="models",
                                               distance_table="dist_table")
        metrics = [
            {
                "metric": "precision@",
                "parameter": "100_abs"
            },
            {
                "metric": "recall@",
                "parameter": "100_abs"
            },
        ]
        model_group_ids = [mg.model_group_id for mg in model_groups.values()]
        distance_table.create_and_populate(
            model_group_ids, ["2014-01-01", "2015-01-01", "2016-01-01"],
            metrics)

        # get an ordered list of the models/groups for a particular metric/time
        query = """
            select model_id, raw_value, dist_from_best_case, dist_from_best_case_next_time
            from dist_table where metric = %s and parameter = %s and train_end_time = %s
            order by dist_from_best_case
        """

        prec_3y_ago = engine.execute(query,
                                     ("precision@", "100_abs", "2014-01-01"))
        assert [row for row in prec_3y_ago] == [
            (models["spiky_3y_ago"].model_id, 0.8, 0, 0.17),
            (models["stable_3y_ago"].model_id, 0.6, 0.2, 0),
            (models["bad_3y_ago"].model_id, 0.4, 0.4, 0.18),
        ]

        recall_2y_ago = engine.execute(query,
                                       ("recall@", "100_abs", "2015-01-01"))
        assert [row for row in recall_2y_ago] == [
            (models["spiky_2y_ago"].model_id, 0.8, 0, 0.19),
            (models["stable_2y_ago"].model_id, 0.56, 0.24, 0),
            (models["bad_2y_ago"].model_id, 0.34, 0.46, 0.19),
        ]

        assert distance_table.observed_bounds == {
            ("precision@", "100_abs"): (0.39, 0.8),
            ("recall@", "100_abs"): (0.34, 0.8),
        }
Esempio n. 27
0
def test_Auditioner():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        # set up data, randomly generated by the factories but conforming
        # generally to what we expect triage_metadata schema data to look like

        num_model_groups = 10
        model_types = [
            "classifier type {}".format(i) for i in range(0, num_model_groups)
        ]
        model_groups = [
            ModelGroupFactory(model_type=model_type)
            for model_type in model_types
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2014, 1, 1),
            datetime(2015, 1, 1),
            datetime(2016, 1, 1),
        ]

        models = [
            ModelFactory(model_group_rel=model_group,
                         train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ("precision@", "100_abs"),
            ("recall@", "100_abs"),
            ("precision@", "50_abs"),
            ("recall@", "50_abs"),
            ("fpr@", "10_pct"),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(model_rel=model,
                                     metric=metric,
                                     parameter=parameter)

        session.commit()

        # define a very loose filtering that should admit all model groups
        no_filtering = [
            {
                "metric": "precision@",
                "parameter": "100_abs",
                "max_from_best": 1.0,
                "threshold_value": 0.0,
            },
            {
                "metric": "recall@",
                "parameter": "100_abs",
                "max_from_best": 1.0,
                "threshold_value": 0.0,
            },
        ]
        model_group_ids = [mg.model_group_id for mg in model_groups]
        auditioner = Auditioner(db_engine, model_group_ids, train_end_times,
                                no_filtering)
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups
        auditioner.plot_model_groups()

        # here, we pick thresholding rules that should definitely remove
        # all model groups from contention because they are too strict.
        remove_all = [
            {
                "metric": "precision@",
                "parameter": "100_abs",
                "max_from_best": 0.0,
                "threshold_value": 1.1,
            },
            {
                "metric": "recall@",
                "parameter": "100_abs",
                "max_from_best": 0.0,
                "threshold_value": 1.1,
            },
        ]

        auditioner.update_metric_filters(new_filters=remove_all)
        assert len(auditioner.thresholded_model_group_ids) == 0

        # pass the argument instead and remove all model groups
        auditioner.set_one_metric_filter(
            metric="precision@",
            parameter="100_abs",
            max_from_best=0.0,
            threshold_value=1.1,
        )
        assert len(auditioner.thresholded_model_group_ids) == 0

        # one potential place for bugs would be when we pull back the rules
        # for being too restrictive. we want to make sure that the original list is
        # always used for thresholding, or else such a move would be impossible
        auditioner.update_metric_filters(new_filters=no_filtering)
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups

        # pass the argument instead and let all model groups pass
        auditioner.set_one_metric_filter(
            metric="precision@",
            parameter="100_abs",
            max_from_best=1.0,
            threshold_value=0.0,
        )
        assert len(auditioner.thresholded_model_group_ids) == num_model_groups

        # now, we want to take this partially thresholded list and run it through
        # a grid of selection rules, meant to pick winners by a variety of user-defined
        # criteria
        rule_grid = [
            {
                "shared_parameters": [
                    {
                        "metric": "precision@",
                        "parameter": "100_abs"
                    },
                    {
                        "metric": "recall@",
                        "parameter": "100_abs"
                    },
                ],
                "selection_rules": [
                    {
                        "name": "most_frequent_best_dist",
                        "dist_from_best_case": [0.1, 0.2, 0.3],
                        "n": 1,
                    },
                    {
                        "name": "best_current_value",
                        "n": 1
                    },
                ],
            },
            {
                "shared_parameters": [{
                    "metric1": "precision@",
                    "parameter1": "100_abs"
                }],
                "selection_rules": [{
                    "name": "best_average_two_metrics",
                    "metric2": ["recall@"],
                    "parameter2": ["100_abs"],
                    "metric1_weight": [0.4, 0.5, 0.6],
                    "n": 1,
                }],
            },
        ]
        auditioner.register_selection_rule_grid(rule_grid, plot=False)
        final_model_group_ids = auditioner.selection_rule_model_group_ids

        # we expect the result to be a mapping of selection rule name to model group id
        assert isinstance(final_model_group_ids, dict)

        # we expect that there is one winner for each selection rule
        assert sorted(final_model_group_ids.keys()) == sorted(
            [rule.descriptive_name for rule in auditioner.selection_rules])
Esempio n. 28
0
def test_integration():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        with mock_s3():
            s3_conn = boto3.resource('s3')
            s3_conn.create_bucket(Bucket='econ-dev')
            project_path = 'econ-dev/inspections'

            # create train and test matrices
            train_matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': [7, 8]
            }).set_index('entity_id')
            train_metadata = {
                'feature_start_time': datetime.date(2012, 12, 20),
                'end_time': datetime.date(2016, 12, 20),
                'label_name': 'label',
                'label_timespan': '1y',
                'feature_names': ['ft1', 'ft2'],
                'metta-uuid': '1234',
                'indices': ['entity_id'],
                'matrix_type': 'train'
            }
            # Creates a matrix entry in the matrices table with uuid from train_metadata
            MatrixFactory(matrix_uuid="1234")
            session.commit()

            train_store = InMemoryMatrixStore(train_matrix, sample_metadata())

            as_of_dates = [
                datetime.date(2016, 12, 21),
                datetime.date(2017, 1, 21)
            ]

            test_stores = [
                InMemoryMatrixStore(
                    pandas.DataFrame.from_dict({
                        'entity_id': [3],
                        'feature_one': [8],
                        'feature_two': [5],
                        'label': [5]
                    }), {
                        'label_name': 'label',
                        'label_timespan': '1y',
                        'end_time': as_of_date,
                        'metta-uuid': '1234',
                        'indices': ['entity_id'],
                        'matrix_type': 'test',
                        'as_of_date_frequency': '1month'
                    }) for as_of_date in as_of_dates
            ]

            model_storage_engine = S3ModelStorageEngine(project_path)

            experiment_hash = save_experiment_and_get_hash({}, db_engine)
            # instantiate pipeline objects
            trainer = ModelTrainer(
                project_path=project_path,
                experiment_hash=experiment_hash,
                model_storage_engine=model_storage_engine,
                db_engine=db_engine,
            )
            predictor = Predictor(project_path, model_storage_engine,
                                  db_engine)
            model_evaluator = ModelEvaluator([{
                'metrics': ['precision@'],
                'thresholds': {
                    'top_n': [5]
                }
            }], [{}], db_engine)

            # run the pipeline
            grid_config = {
                'sklearn.linear_model.LogisticRegression': {
                    'C': [0.00001, 0.0001],
                    'penalty': ['l1', 'l2'],
                    'random_state': [2193]
                }
            }
            model_ids = trainer.train_models(grid_config=grid_config,
                                             misc_db_parameters=dict(),
                                             matrix_store=train_store)

            for model_id in model_ids:
                for as_of_date, test_store in zip(as_of_dates, test_stores):
                    predictions_proba = predictor.predict(
                        model_id,
                        test_store,
                        misc_db_parameters=dict(),
                        train_matrix_columns=['feature_one', 'feature_two'])

                    model_evaluator.evaluate(
                        predictions_proba,
                        test_store,
                        model_id,
                    )

            # assert
            # 1. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row for row in db_engine.execute(
                    '''select entity_id, model_id, as_of_date
                from test_results.test_predictions
                join model_metadata.models using (model_id)
                order by 3, 2''')
            ]
            assert records == [
                (3, 1, datetime.datetime(2016, 12, 21)),
                (3, 2, datetime.datetime(2016, 12, 21)),
                (3, 3, datetime.datetime(2016, 12, 21)),
                (3, 4, datetime.datetime(2016, 12, 21)),
                (3, 1, datetime.datetime(2017, 1, 21)),
                (3, 2, datetime.datetime(2017, 1, 21)),
                (3, 3, datetime.datetime(2017, 1, 21)),
                (3, 4, datetime.datetime(2017, 1, 21)),
            ]

            # that evaluations are there
            records = [
                row for row in db_engine.execute('''
                    select model_id, evaluation_start_time, metric, parameter
                    from test_results.test_evaluations order by 2, 1''')
            ]
            assert records == [
                (1, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (2, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (3, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (4, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'),
                (1, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (2, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (3, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
                (4, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'),
            ]
Esempio n. 29
0
def test_evaluating_early_warning(db_engine_with_results_schema):
    num_entities = 10
    labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

    # Set up testing configuration parameters
    testing_metric_groups = [
        {
            "metrics": [
                "precision@",
                "recall@",
                "true positives@",
                "true negatives@",
                "false positives@",
                "false negatives@",
            ],
            "thresholds": {"percentiles": [5.0, 10.0], "top_n": [5, 10]},
        },
        {
            "metrics": [
                "f1",
                "mediocre",
                "accuracy",
                "roc_auc",
                "average precision score",
            ]
        },
        {"metrics": ["fbeta@"], "parameters": [{"beta": 0.75}, {"beta": 1.25}]},
    ]

    training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}]

    custom_metrics = {"mediocre": always_half}

    # Acquire fake data and objects to be used in the tests
    model_evaluator = ModelEvaluator(
        testing_metric_groups,
        training_metric_groups,
        db_engine_with_results_schema,
        custom_metrics=custom_metrics,
    )

    fake_test_matrix_store = MockMatrixStore(
        matrix_type="test",
        matrix_uuid="efgh",
        label_count=num_entities,
        db_engine=db_engine_with_results_schema,
        init_labels=pd.DataFrame(
            {
                "label_value": labels,
                "entity_id": list(range(num_entities)),
                "as_of_date": [TRAIN_END_TIME] * num_entities,
            }
        )
        .set_index(["entity_id", "as_of_date"])
        .label_value,
        init_as_of_dates=[TRAIN_END_TIME],
    )
    fake_train_matrix_store = MockMatrixStore(
        matrix_type="train",
        matrix_uuid="1234",
        label_count=num_entities,
        db_engine=db_engine_with_results_schema,
        init_labels=pd.DataFrame(
            {
                "label_value": labels,
                "entity_id": list(range(num_entities)),
                "as_of_date": [TRAIN_END_TIME] * num_entities,
            }
        )
        .set_index(["entity_id", "as_of_date"])
        .label_value,
        init_as_of_dates=[TRAIN_END_TIME],
    )

    trained_model, model_id = fake_trained_model(
        db_engine_with_results_schema,
        train_end_time=TRAIN_END_TIME,
    )

    # ensure that the matrix uuid is present
    matrix_uuids = [
        row[0]
        for row in db_engine_with_results_schema.execute(
            "select matrix_uuid from test_results.evaluations"
        )
    ]
    assert all(matrix_uuid == "efgh" for matrix_uuid in matrix_uuids)

    # Evaluate the training metrics and test
    model_evaluator.evaluate(
        trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id
    )
    records = [
        row[0]
        for row in db_engine_with_results_schema.execute(
            """select distinct(metric || parameter)
            from train_results.evaluations
            where model_id = %s and
            evaluation_start_time = %s
            order by 1""",
            (model_id, fake_train_matrix_store.as_of_dates[0]),
        )
    ]
    assert records == ["accuracy", "roc_auc"]

    # Run tests for overall and subset evaluations
    for subset in SUBSETS:
        if subset is None:
            where_hash = ""
        else:
            populate_subset_data(
                db_engine_with_results_schema, subset, list(range(num_entities))
            )
            SubsetFactory(subset_hash=filename_friendly_hash(subset))
            session.commit()
            where_hash = f"and subset_hash = '{filename_friendly_hash(subset)}'"
        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_test_matrix_store,
            model_id,
            subset=subset,
        )

        records = [
            row[0]
            for row in db_engine_with_results_schema.execute(
                f"""\
                select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                {where_hash}
                order by 1
                """,
                (model_id, fake_test_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == [
            "accuracy",
            "average precision score",
            "f1",
            "false [email protected]_pct",
            "false negatives@10_abs",
            "false [email protected]_pct",
            "false negatives@5_abs",
            "false [email protected]_pct",
            "false positives@10_abs",
            "false [email protected]_pct",
            "false positives@5_abs",
            "[email protected]_beta",
            "[email protected]_beta",
            "mediocre",
            "[email protected]_pct",
            "precision@10_abs",
            "[email protected]_pct",
            "precision@5_abs",
            "[email protected]_pct",
            "recall@10_abs",
            "[email protected]_pct",
            "recall@5_abs",
            "roc_auc",
            "true [email protected]_pct",
            "true negatives@10_abs",
            "true [email protected]_pct",
            "true negatives@5_abs",
            "true [email protected]_pct",
            "true positives@10_abs",
            "true [email protected]_pct",
            "true positives@5_abs",
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_train_matrix_store,
            model_id,
            subset=subset,
        )

        records = [
            row[0]
            for row in db_engine_with_results_schema.execute(
                f"""select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                {where_hash}
                order by 1""",
                (model_id, fake_train_matrix_store.as_of_dates[0]),
            )
        ]
        assert records == ["accuracy", "roc_auc"]

    # ensure that the matrix uuid is present
    matrix_uuids = [
        row[0]
        for row in db_engine_with_results_schema.execute(
            "select matrix_uuid from train_results.evaluations"
        )
    ]
    assert all(matrix_uuid == "1234" for matrix_uuid in matrix_uuids)
Esempio n. 30
0
def test_PreAudition():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)
        # set up data, randomly generated by the factories but conforming
        # generally to what we expect results schema data to look like
        num_model_groups = 10
        model_types = [
            'classifier type {}'.format(i) for i in range(0, num_model_groups)
        ]
        model_configs = [{
            'label_definition': 'label_1'
        } if i % 2 == 0 else {
            'label_definition': 'label_2'
        } for i in range(0, num_model_groups)]
        model_groups = [
            ModelGroupFactory(model_type=model_type, model_config=model_config)
            for model_type, model_config in zip(model_types, model_configs)
        ]
        train_end_times = [
            datetime(2013, 1, 1),
            datetime(2013, 7, 1),
            datetime(2014, 1, 1),
            datetime(2014, 7, 1),
            datetime(2015, 1, 1),
            datetime(2015, 7, 1),
            datetime(2016, 7, 1),
            datetime(2016, 1, 1),
        ]
        models = [
            ModelFactory(model_group_rel=model_group,
                         train_end_time=train_end_time)
            for model_group in model_groups
            for train_end_time in train_end_times
        ]
        metrics = [
            ('precision@', '100_abs'),
            ('recall@', '100_abs'),
            ('precision@', '50_abs'),
            ('recall@', '50_abs'),
            ('fpr@', '10_pct'),
        ]

        class ImmediateEvalFactory(EvaluationFactory):
            evaluation_start_time = factory.LazyAttribute(
                lambda o: o.model_rel.train_end_time)

        for model in models:
            for (metric, parameter) in metrics:
                ImmediateEvalFactory(model_rel=model,
                                     metric=metric,
                                     parameter=parameter)

        session.commit()

        pre_aud = PreAudition(db_engine)

        # Expect the number of model groups with label_1
        assert len(pre_aud.get_model_groups_from_label("label_1")) == \
            sum([x['label_definition']=='label_1' for x in model_configs])

        # Expect the number of model groups with certain experiment_hash
        experiment_hash = list(
            pd.read_sql("SELECT experiment_hash FROM results.models limit 1",
                        con=db_engine)['experiment_hash'])[0]
        assert len(
            pre_aud.get_model_groups_from_experiment(experiment_hash)) == 1

        # Expect the number of model groups for customs SQL
        query = """
            SELECT DISTINCT(model_group_id)
            FROM results.models
            WHERE train_end_time >= '2013-01-01'
            AND experiment_hash = '{}'
        """.format(experiment_hash)
        assert len(pre_aud.get_model_groups(query)) == 1

        # Expect the number of train_end_times after 2014-01-01
        assert len(pre_aud.get_train_end_times(after='2014-01-01')) == 6

        query = """
            SELECT DISTINCT train_end_time
            FROM results.models
            WHERE model_group_id IN ({})
                AND train_end_time >= '2014-01-01'
            ORDER BY train_end_time
            """.format(', '.join(map(str, pre_aud.model_groups)))

        assert len(pre_aud.get_train_end_times(query=query)) == 6