예제 #1
0
    def test_retry_max(self):
        db_engine = None
        trainer = None
        # set up a basic model training run
        # TODO abstract the setup of a basic model training run where
        # we don't worry about the specific values used? it would make
        # tests like this require a bit less noise to read past
        with testing.postgresql.Postgresql() as postgresql:
            db_engine = create_engine(postgresql.url())
            ensure_db(db_engine)
            init_engine(db_engine)
            trainer = ModelTrainer(
                project_path='econ-dev/inspections',
                experiment_hash=None,
                model_storage_engine=InMemoryModelStorageEngine(
                    project_path=''),
                db_engine=db_engine,
                model_grouper=ModelGrouper())

        # the postgres server goes out of scope here and thus no longer exists
        with patch('time.sleep') as time_mock:
            with self.assertRaises(sqlalchemy.exc.OperationalError):
                trainer.train_models(grid_config(), dict(),
                                     sample_matrix_store())
            # we want to make sure that we are using the retrying module sanely
            # as opposed to matching the exact # of calls specified by the code
            assert len(time_mock.mock_calls) > 5
예제 #2
0
    def test_retry_recovery(self):
        grid_config = {
            'sklearn.ensemble.AdaBoostClassifier': {
                'n_estimators': [10]
            },
        }

        engine = None
        trainer = None
        port = None
        with testing.postgresql.Postgresql() as postgresql:
            port = postgresql.settings['port']
            engine = create_engine(postgresql.url())
            ensure_db(engine)
            trainer = ModelTrainer(
                project_path='econ-dev/inspections',
                experiment_hash=None,
                model_storage_engine=InMemoryModelStorageEngine(project_path=''),
                db_engine=engine,
                model_group_keys=['label_name', 'label_timespan']
            )

            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': ['good', 'bad']
            })
            matrix_store = InMemoryMatrixStore(matrix, {
                'label_timespan': '1d',
                'end_time': datetime.datetime.now(),
                'feature_start_time': datetime.date(2012, 12, 20),
                'label_name': 'label',
                'metta-uuid': '1234',
                'feature_names': ['ft1', 'ft2'],
                'indices': ['entity_id'],
            })

        # start without a database server
        # then bring it back up after the first sleep
        # use self so it doesn't go out of scope too early and shut down
        self.new_server = None
        def replace_db(arg):
            self.new_server = testing.postgresql.Postgresql(port=port)
            engine = create_engine(self.new_server.url())
            ensure_db(engine)
        with patch('time.sleep') as time_mock:
            time_mock.side_effect = replace_db
            try:
                trainer.train_models(grid_config, dict(), matrix_store)
            finally:
                if self.new_server is not None:
                    self.new_server.stop()
            assert len(time_mock.mock_calls) == 1
예제 #3
0
def test_predictor_get_train_columns():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        project_path = 'econ-dev/inspections'
        with tempfile.TemporaryDirectory() as temp_dir:
            train_store, test_store = sample_metta_csv_diff_order(temp_dir)

            model_storage_engine = InMemoryModelStorageEngine(project_path)
            _, model_id = \
                fake_trained_model(
                    project_path,
                    model_storage_engine,
                    db_engine,
                    train_matrix_uuid=train_store.uuid
                )
            predictor = Predictor(project_path, model_storage_engine,
                                  db_engine)

            # The train_store uuid is stored in fake_trained_model. Storing the other
            MatrixFactory(matrix_uuid=test_store.uuid)
            session.commit()

            # Runs the same test for training and testing predictions
            for store, mat_type in zip((train_store, test_store),
                                       ("train", "test")):
                predict_proba = predictor.predict(
                    model_id,
                    store,
                    misc_db_parameters=dict(),
                    train_matrix_columns=train_store.columns())
                # assert
                # 1. that we calculated predictions
                assert len(predict_proba) > 0

                # 2. that the predictions table entries are present and
                # can be linked to the original models
                records = [
                    row for row in db_engine.execute(
                        '''select entity_id, as_of_date
                    from {}_results.{}_predictions
                    join model_metadata.models using (model_id)'''.format(
                            mat_type, mat_type))
                ]
                assert len(records) > 0
예제 #4
0
def test_predictor_composite_index():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        project_path = 'econ-dev/inspections'
        model_storage_engine = InMemoryModelStorageEngine(project_path)
        _, model_id = \
            fake_trained_model(project_path, model_storage_engine, db_engine)
        predictor = Predictor(project_path, model_storage_engine, db_engine)
        dayone = datetime.datetime(2011, 1, 1)
        daytwo = datetime.datetime(2011, 1, 2)
        # create prediction set
        matrix = pandas.DataFrame.from_dict({
            'entity_id': [1, 2, 1, 2],
            'as_of_date': [dayone, dayone, daytwo, daytwo],
            'feature_one': [3, 4, 5, 6],
            'feature_two': [5, 6, 7, 8],
            'label': [7, 8, 8, 7]
        }).set_index(['entity_id', 'as_of_date'])
        metadata = {
            'label_name': 'label',
            'end_time': AS_OF_DATE,
            'label_timespan': '3month',
            'metta-uuid': '1234',
            'indices': ['entity_id', 'as_of_date'],
        }
        matrix_store = InMemoryMatrixStore(matrix, metadata)
        predict_proba = predictor.predict(
            model_id,
            matrix_store,
            misc_db_parameters=dict(),
            train_matrix_columns=['feature_one', 'feature_two']
        )

        # assert
        # 1. that the returned predictions are of the desired length
        assert len(predict_proba) == 4

        # 2. that the predictions table entries are present and
        # can be linked to the original models
        records = [
            row for row in
            db_engine.execute('''select entity_id, as_of_date
            from results.predictions
            join results.models using (model_id)''')
        ]
        assert len(records) == 4
예제 #5
0
    def test_retry_max(self):
        grid_config = {
            'sklearn.ensemble.AdaBoostClassifier': {
                'n_estimators': [10]
            },
        }

        engine = None
        trainer = None
        # set up a basic model training run
        # TODO abstract the setup of a basic model training run where
        # we don't worry about the specific values used? it would make
        # tests like this require a bit less noise to read past
        with testing.postgresql.Postgresql() as postgresql:
            engine = create_engine(postgresql.url())
            ensure_db(engine)
            trainer = ModelTrainer(
                project_path='econ-dev/inspections',
                experiment_hash=None,
                model_storage_engine=InMemoryModelStorageEngine(project_path=''),
                db_engine=engine,
                model_group_keys=['label_name', 'label_timespan']
            )

            matrix = pandas.DataFrame.from_dict({
                'entity_id': [1, 2],
                'feature_one': [3, 4],
                'feature_two': [5, 6],
                'label': ['good', 'bad']
            })
            matrix_store = InMemoryMatrixStore(matrix, {
                'label_timespan': '1d',
                'end_time': datetime.datetime.now(),
                'feature_start_time': datetime.date(2012, 12, 20),
                'label_name': 'label',
                'metta-uuid': '1234',
                'feature_names': ['ft1', 'ft2'],
                'indices': ['entity_id'],
            })
        # the postgres server goes out of scope here and thus no longer exists
        with patch('time.sleep') as time_mock:
            with self.assertRaises(sqlalchemy.exc.OperationalError):
                trainer.train_models(grid_config, dict(), matrix_store)
            # we want to make sure that we are using the retrying module sanely
            # as opposed to matching the exact # of calls specified by the code
            assert len(time_mock.mock_calls) > 5
예제 #6
0
def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        metric_groups = [
            {
                'metrics': ['precision@', 'recall@', 'fpr@'],
                'thresholds': {
                    'percentiles': [50.0],
                    'top_n': [3]
                }
            },
            {
                # ensure we test a non-thresholded metric as well
                'metrics': ['accuracy'],
            }
        ]

        model_evaluator = ModelEvaluator(metric_groups, db_engine)

        _, model_id = fake_trained_model(
            'myproject', InMemoryModelStorageEngine('myproject'), db_engine)

        labels = numpy.array([True, False, numpy.nan, True, False])
        prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3])
        evaluation_start = datetime.datetime(2016, 4, 1)
        evaluation_end = datetime.datetime(2016, 7, 1)
        example_as_of_date_frequency = '1d'
        model_evaluator.evaluate(prediction_probas, labels, model_id,
                                 evaluation_start, evaluation_end,
                                 example_as_of_date_frequency)

        for record in db_engine.execute(
                '''select * from results.evaluations
            where model_id = %s and evaluation_start_time = %s order by 1''',
            (model_id, evaluation_start)):
            assert record['num_labeled_examples'] == 4
            assert record['num_positive_labels'] == 2
            if record['parameter'] == '':
                assert record['num_labeled_above_threshold'] == 4
            elif 'pct' in record['parameter']:
                assert record['num_labeled_above_threshold'] == 1
            else:
                assert record['num_labeled_above_threshold'] == 2
예제 #7
0
def test_calculate_and_save():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        project_path = 'econ-dev/inspections'
        with tempfile.TemporaryDirectory() as temp_dir:
            train_store, test_store = sample_metta_csv_diff_order(temp_dir)
            model_storage_engine = InMemoryModelStorageEngine(project_path)
            calculator = IndividualImportanceCalculator(db_engine,
                                                        methods=['sample'],
                                                        replace=False)
            # given a trained model
            # and a test matrix
            _, model_id = \
                fake_trained_model(
                    project_path,
                    model_storage_engine,
                    db_engine,
                    train_matrix_uuid=train_store.uuid
                )
            # i expect to be able to call calculate and save
            calculator.calculate_and_save_all_methods_and_dates(
                model_id, test_store)
            # and find individual importances in the results schema afterwards
            records = [
                row
                for row in db_engine.execute('''select entity_id, as_of_date
                from test_results.individual_importances
                join model_metadata.models using (model_id)''')
            ]
            assert len(records) > 0
            # and that when run again, has the same result
            calculator.calculate_and_save_all_methods_and_dates(
                model_id, test_store)
            new_records = [
                row
                for row in db_engine.execute('''select entity_id, as_of_date
                from test_results.individual_importances
                join model_metadata.models using (model_id)''')
            ]
            assert len(records) == len(new_records)
            assert records == new_records
예제 #8
0
    def test_retry_recovery(self):
        db_engine = None
        trainer = None
        port = None
        with testing.postgresql.Postgresql() as postgresql:
            port = postgresql.settings['port']
            db_engine = create_engine(postgresql.url())
            ensure_db(db_engine)
            init_engine(db_engine)
            trainer = ModelTrainer(
                project_path='econ-dev/inspections',
                experiment_hash=None,
                model_storage_engine=InMemoryModelStorageEngine(
                    project_path=''),
                db_engine=db_engine,
                model_grouper=ModelGrouper())

        # start without a database server
        # then bring it back up after the first sleep
        # use self so it doesn't go out of scope too early and shut down
        self.new_server = None

        def replace_db(arg):
            self.new_server = testing.postgresql.Postgresql(port=port)
            db_engine = create_engine(self.new_server.url())
            ensure_db(db_engine)
            init_engine(db_engine)

            # Creates a matrix entry in the matrices table with uuid from train_metadata
            MatrixFactory(matrix_uuid="1234")
            session.commit()

        with patch('time.sleep') as time_mock:
            time_mock.side_effect = replace_db
            try:
                trainer.train_models(grid_config(), dict(),
                                     sample_matrix_store())
            finally:
                if self.new_server is not None:
                    self.new_server.stop()
            assert len(time_mock.mock_calls) == 1
예제 #9
0
def test_predictor_get_train_columns():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        project_path = 'econ-dev/inspections'
        with tempfile.TemporaryDirectory() as temp_dir:
            train_store, test_store = sample_metta_csv_diff_order(temp_dir)

            model_storage_engine = InMemoryModelStorageEngine(project_path)
            _, model_id = \
                fake_trained_model(
                    project_path,
                    model_storage_engine,
                    db_engine,
                    train_matrix_uuid=train_store.uuid
                )
            predictor = Predictor(project_path, model_storage_engine, db_engine)

            predict_proba = predictor.predict(
                model_id,
                test_store,
                misc_db_parameters=dict(),
                train_matrix_columns=train_store.columns()
            )
            # assert
            # 1. that we calculated predictions
            assert len(predict_proba) > 0

            # 2. that the predictions table entries are present and
            # can be linked to the original models
            records = [
                row for row in
                db_engine.execute('''select entity_id, as_of_date
                from results.predictions
                join results.models using (model_id)''')
            ]
            assert len(records) > 0
예제 #10
0
def test_predictor_retrieve():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        init_engine(db_engine)

        project_path = 'econ-dev/inspections'
        model_storage_engine = InMemoryModelStorageEngine(project_path)

        _, model_id = \
            fake_trained_model(project_path, model_storage_engine, db_engine, train_matrix_uuid='1234')

        predictor = Predictor(project_path,
                              model_storage_engine,
                              db_engine,
                              replace=False)

        dayone = datetime.date(2011, 1,
                               1).strftime(predictor.expected_matrix_ts_format)
        daytwo = datetime.date(2011, 1,
                               2).strftime(predictor.expected_matrix_ts_format)

        # create prediction set
        matrix_data = {
            'entity_id': [1, 2, 1, 2],
            'as_of_date': [dayone, dayone, daytwo, daytwo],
            'feature_one': [3, 4, 5, 6],
            'feature_two': [5, 6, 7, 8],
            'label': [7, 8, 8, 7]
        }
        matrix = pandas.DataFrame.from_dict(matrix_data)\
            .set_index(['entity_id', 'as_of_date'])
        metadata = {
            'label_name': 'label',
            'end_time': AS_OF_DATE,
            'label_timespan': '3month',
            'metta-uuid': '1234',
            'indices': ['entity_id', 'as_of_date'],
            'matrix_type': 'test'
        }

        matrix_store = InMemoryMatrixStore(matrix, metadata)

        predict_proba = predictor.predict(
            model_id,
            matrix_store,
            misc_db_parameters=dict(),
            train_matrix_columns=['feature_one', 'feature_two'])

        # When run again, the predictions retrieved from the database
        # should match.
        #
        # Some trickiness here. Let's explain:
        #
        # If we are not careful, retrieving predictions from the database and
        # presenting them as a numpy array can result in a bad ordering,
        # since the given matrix may not be 'ordered' by some criteria
        # that can be easily represented by an ORDER BY clause.
        #
        # It will sometimes work, because without ORDER BY you will get
        # it back in the table's physical order, which unless something has
        # happened to the table will be the order you inserted it,
        # which could very well be the order in the matrix.
        # So it's not a bug that would necessarily immediately show itself,
        # but when it does go wrong your scores will be garbage.
        #
        # So we simulate a table order mutation that can happen over time:
        # Remove the first row and put it at the end.
        # If the Predictor doesn't explicitly reorder the results, this will fail
        # Only running on TestPrediction because TrainPrediction behaves the exact same way
        reorder_session = sessionmaker(bind=db_engine)()
        obj = reorder_session.query(TestPrediction).first()
        reorder_session.delete(obj)
        reorder_session.commit()

        make_transient(obj)
        reorder_session = sessionmaker(bind=db_engine)()
        reorder_session.add(obj)
        reorder_session.commit()

        predictor.load_model = Mock()
        new_predict_proba = predictor.predict(
            model_id,
            matrix_store,
            misc_db_parameters=dict(),
            train_matrix_columns=['feature_one', 'feature_two'])
        assert_array_equal(new_predict_proba, predict_proba)
        assert not predictor.load_model.called
예제 #11
0
def test_evaluating_early_warning():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [{
            'metrics': ['precision@',
                        'recall@',
                        'true positives@',
                        'true negatives@',
                        'false positives@',
                        'false negatives@'],
            'thresholds': {
                'percentiles': [5.0, 10.0],
                'top_n': [5, 10]
            }
        }, {
            'metrics': ['f1',
                        'mediocre',
                        'accuracy',
                        'roc_auc',
                        'average precision score'],
        }, {
            'metrics': ['fbeta@'],
            'parameters': [{'beta': 0.75}, {'beta': 1.25}]
        }]

        training_metric_groups = [{'metrics': ['accuracy', 'roc_auc']}]

        custom_metrics = {'mediocre': always_half}

        model_evaluator = ModelEvaluator(testing_metric_groups, training_metric_groups, db_engine,
            custom_metrics=custom_metrics
        )

        labels = fake_labels(5)
        fake_train_matrix_store = MockMatrixStore('train', 'efgh', 5, db_engine, labels)
        fake_test_matrix_store = MockMatrixStore('test', '1234', 5, db_engine, labels)

        trained_model, model_id = fake_trained_model(
            'myproject',
            InMemoryModelStorageEngine('myproject'),
            db_engine
        )

        # Evaluate the testing metrics and test for all of them.
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_test_matrix_store,
            model_id,
        )
        records = [
            row[0] for row in
            db_engine.execute(
                '''select distinct(metric || parameter)
                from test_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1''',
                (model_id, fake_test_matrix_store.as_of_dates[0])
            )
        ]
        assert records == [
            'accuracy',
            'average precision score',
            'f1',
            'false [email protected]_pct',
            'false negatives@10_abs',
            'false [email protected]_pct',
            'false negatives@5_abs',
            'false [email protected]_pct',
            'false positives@10_abs',
            'false [email protected]_pct',
            'false positives@5_abs',
            '[email protected]_beta',
            '[email protected]_beta',
            'mediocre',
            '[email protected]_pct',
            'precision@10_abs',
            '[email protected]_pct',
            'precision@5_abs',
            '[email protected]_pct',
            'recall@10_abs',
            '[email protected]_pct',
            'recall@5_abs',
            'roc_auc',
            'true [email protected]_pct',
            'true negatives@10_abs',
            'true [email protected]_pct',
            'true negatives@5_abs',
            'true [email protected]_pct',
            'true positives@10_abs',
            'true [email protected]_pct',
            'true positives@5_abs'
        ]

        # Evaluate the training metrics and test
        model_evaluator.evaluate(
            trained_model.predict_proba(labels)[:, 1],
            fake_train_matrix_store,
            model_id,
        )
        records = [
            row[0] for row in
            db_engine.execute(
                '''select distinct(metric || parameter)
                from train_results.evaluations
                where model_id = %s and
                evaluation_start_time = %s
                order by 1''',
                (model_id, fake_train_matrix_store.as_of_dates[0])
            )
        ]
        assert records == ['accuracy', 'roc_auc']
예제 #12
0
def test_model_scoring_inspections():
    with testing.postgresql.Postgresql() as postgresql:
        db_engine = create_engine(postgresql.url())
        ensure_db(db_engine)
        testing_metric_groups = [{
            'metrics': ['precision@', 'recall@', 'fpr@'],
            'thresholds': {'percentiles': [50.0], 'top_n': [3]}
        }, {
            # ensure we test a non-thresholded metric as well
            'metrics': ['accuracy'],
        }]
        training_metric_groups = [{'metrics': ['accuracy'], 'thresholds': {'percentiles': [50.0]}}]

        model_evaluator = ModelEvaluator(testing_metric_groups, training_metric_groups, db_engine)

        testing_labels = numpy.array([True, False, numpy.nan, True, False])
        testing_prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3])

        training_labels = numpy.array([False, False, True, True, True, False, True, True])
        training_prediction_probas = numpy.array([0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6])

        fake_train_matrix_store = MockMatrixStore('train', 'efgh', 5, db_engine, training_labels)
        fake_test_matrix_store = MockMatrixStore('test', '1234', 5, db_engine, testing_labels)

        trained_model, model_id = fake_trained_model(
            'myproject',
            InMemoryModelStorageEngine('myproject'),
            db_engine
        )

        # Evaluate testing matrix and test the results
        model_evaluator.evaluate(
            testing_prediction_probas,
            fake_test_matrix_store,
            model_id,
        )
        for record in db_engine.execute(
            '''select * from test_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1''',
            (model_id, fake_test_matrix_store.as_of_dates[0])
        ):
            assert record['num_labeled_examples'] == 4
            assert record['num_positive_labels'] == 2
            if record['parameter'] == '':
                assert record['num_labeled_above_threshold'] == 4
            elif 'pct' in record['parameter']:
                assert record['num_labeled_above_threshold'] == 1
            else:
                assert record['num_labeled_above_threshold'] == 2

        # Evaluate the training matrix and test the results
        model_evaluator.evaluate(
                    training_prediction_probas,
                    fake_train_matrix_store,
                    model_id,
        )
        for record in db_engine.execute(
            '''select * from train_results.evaluations
            where model_id = %s and evaluation_start_time = %s
            order by 1''',
            (model_id, fake_train_matrix_store.as_of_dates[0])
        ):
            assert record['num_labeled_examples'] == 8
            assert record['num_positive_labels'] == 5
            assert record['value'] == 0.625