def test_retry_max(self): db_engine = None trainer = None # set up a basic model training run # TODO abstract the setup of a basic model training run where # we don't worry about the specific values used? it would make # tests like this require a bit less noise to read past with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) trainer = ModelTrainer( project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=InMemoryModelStorageEngine( project_path=''), db_engine=db_engine, model_grouper=ModelGrouper()) # the postgres server goes out of scope here and thus no longer exists with patch('time.sleep') as time_mock: with self.assertRaises(sqlalchemy.exc.OperationalError): trainer.train_models(grid_config(), dict(), sample_matrix_store()) # we want to make sure that we are using the retrying module sanely # as opposed to matching the exact # of calls specified by the code assert len(time_mock.mock_calls) > 5
def test_retry_recovery(self): grid_config = { 'sklearn.ensemble.AdaBoostClassifier': { 'n_estimators': [10] }, } engine = None trainer = None port = None with testing.postgresql.Postgresql() as postgresql: port = postgresql.settings['port'] engine = create_engine(postgresql.url()) ensure_db(engine) trainer = ModelTrainer( project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=InMemoryModelStorageEngine(project_path=''), db_engine=engine, model_group_keys=['label_name', 'label_timespan'] ) matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': ['good', 'bad'] }) matrix_store = InMemoryMatrixStore(matrix, { 'label_timespan': '1d', 'end_time': datetime.datetime.now(), 'feature_start_time': datetime.date(2012, 12, 20), 'label_name': 'label', 'metta-uuid': '1234', 'feature_names': ['ft1', 'ft2'], 'indices': ['entity_id'], }) # start without a database server # then bring it back up after the first sleep # use self so it doesn't go out of scope too early and shut down self.new_server = None def replace_db(arg): self.new_server = testing.postgresql.Postgresql(port=port) engine = create_engine(self.new_server.url()) ensure_db(engine) with patch('time.sleep') as time_mock: time_mock.side_effect = replace_db try: trainer.train_models(grid_config, dict(), matrix_store) finally: if self.new_server is not None: self.new_server.stop() assert len(time_mock.mock_calls) == 1
def test_predictor_get_train_columns(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) predictor = Predictor(project_path, model_storage_engine, db_engine) # The train_store uuid is stored in fake_trained_model. Storing the other MatrixFactory(matrix_uuid=test_store.uuid) session.commit() # Runs the same test for training and testing predictions for store, mat_type in zip((train_store, test_store), ("train", "test")): predict_proba = predictor.predict( model_id, store, misc_db_parameters=dict(), train_matrix_columns=train_store.columns()) # assert # 1. that we calculated predictions assert len(predict_proba) > 0 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute( '''select entity_id, as_of_date from {}_results.{}_predictions join model_metadata.models using (model_id)'''.format( mat_type, mat_type)) ] assert len(records) > 0
def test_predictor_composite_index(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model(project_path, model_storage_engine, db_engine) predictor = Predictor(project_path, model_storage_engine, db_engine) dayone = datetime.datetime(2011, 1, 1) daytwo = datetime.datetime(2011, 1, 2) # create prediction set matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2, 1, 2], 'as_of_date': [dayone, dayone, daytwo, daytwo], 'feature_one': [3, 4, 5, 6], 'feature_two': [5, 6, 7, 8], 'label': [7, 8, 8, 7] }).set_index(['entity_id', 'as_of_date']) metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE, 'label_timespan': '3month', 'metta-uuid': '1234', 'indices': ['entity_id', 'as_of_date'], } matrix_store = InMemoryMatrixStore(matrix, metadata) predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two'] ) # assert # 1. that the returned predictions are of the desired length assert len(predict_proba) == 4 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.predictions join results.models using (model_id)''') ] assert len(records) == 4
def test_retry_max(self): grid_config = { 'sklearn.ensemble.AdaBoostClassifier': { 'n_estimators': [10] }, } engine = None trainer = None # set up a basic model training run # TODO abstract the setup of a basic model training run where # we don't worry about the specific values used? it would make # tests like this require a bit less noise to read past with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) trainer = ModelTrainer( project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=InMemoryModelStorageEngine(project_path=''), db_engine=engine, model_group_keys=['label_name', 'label_timespan'] ) matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': ['good', 'bad'] }) matrix_store = InMemoryMatrixStore(matrix, { 'label_timespan': '1d', 'end_time': datetime.datetime.now(), 'feature_start_time': datetime.date(2012, 12, 20), 'label_name': 'label', 'metta-uuid': '1234', 'feature_names': ['ft1', 'ft2'], 'indices': ['entity_id'], }) # the postgres server goes out of scope here and thus no longer exists with patch('time.sleep') as time_mock: with self.assertRaises(sqlalchemy.exc.OperationalError): trainer.train_models(grid_config, dict(), matrix_store) # we want to make sure that we are using the retrying module sanely # as opposed to matching the exact # of calls specified by the code assert len(time_mock.mock_calls) > 5
def test_model_scoring_inspections(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) metric_groups = [ { 'metrics': ['precision@', 'recall@', 'fpr@'], 'thresholds': { 'percentiles': [50.0], 'top_n': [3] } }, { # ensure we test a non-thresholded metric as well 'metrics': ['accuracy'], } ] model_evaluator = ModelEvaluator(metric_groups, db_engine) _, model_id = fake_trained_model( 'myproject', InMemoryModelStorageEngine('myproject'), db_engine) labels = numpy.array([True, False, numpy.nan, True, False]) prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3]) evaluation_start = datetime.datetime(2016, 4, 1) evaluation_end = datetime.datetime(2016, 7, 1) example_as_of_date_frequency = '1d' model_evaluator.evaluate(prediction_probas, labels, model_id, evaluation_start, evaluation_end, example_as_of_date_frequency) for record in db_engine.execute( '''select * from results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, evaluation_start)): assert record['num_labeled_examples'] == 4 assert record['num_positive_labels'] == 2 if record['parameter'] == '': assert record['num_labeled_above_threshold'] == 4 elif 'pct' in record['parameter']: assert record['num_labeled_above_threshold'] == 1 else: assert record['num_labeled_above_threshold'] == 2
def test_calculate_and_save(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) calculator = IndividualImportanceCalculator(db_engine, methods=['sample'], replace=False) # given a trained model # and a test matrix _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) # i expect to be able to call calculate and save calculator.calculate_and_save_all_methods_and_dates( model_id, test_store) # and find individual importances in the results schema afterwards records = [ row for row in db_engine.execute('''select entity_id, as_of_date from test_results.individual_importances join model_metadata.models using (model_id)''') ] assert len(records) > 0 # and that when run again, has the same result calculator.calculate_and_save_all_methods_and_dates( model_id, test_store) new_records = [ row for row in db_engine.execute('''select entity_id, as_of_date from test_results.individual_importances join model_metadata.models using (model_id)''') ] assert len(records) == len(new_records) assert records == new_records
def test_retry_recovery(self): db_engine = None trainer = None port = None with testing.postgresql.Postgresql() as postgresql: port = postgresql.settings['port'] db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) trainer = ModelTrainer( project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=InMemoryModelStorageEngine( project_path=''), db_engine=db_engine, model_grouper=ModelGrouper()) # start without a database server # then bring it back up after the first sleep # use self so it doesn't go out of scope too early and shut down self.new_server = None def replace_db(arg): self.new_server = testing.postgresql.Postgresql(port=port) db_engine = create_engine(self.new_server.url()) ensure_db(db_engine) init_engine(db_engine) # Creates a matrix entry in the matrices table with uuid from train_metadata MatrixFactory(matrix_uuid="1234") session.commit() with patch('time.sleep') as time_mock: time_mock.side_effect = replace_db try: trainer.train_models(grid_config(), dict(), sample_matrix_store()) finally: if self.new_server is not None: self.new_server.stop() assert len(time_mock.mock_calls) == 1
def test_predictor_get_train_columns(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) predictor = Predictor(project_path, model_storage_engine, db_engine) predict_proba = predictor.predict( model_id, test_store, misc_db_parameters=dict(), train_matrix_columns=train_store.columns() ) # assert # 1. that we calculated predictions assert len(predict_proba) > 0 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.predictions join results.models using (model_id)''') ] assert len(records) > 0
def test_predictor_retrieve(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) project_path = 'econ-dev/inspections' model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model(project_path, model_storage_engine, db_engine, train_matrix_uuid='1234') predictor = Predictor(project_path, model_storage_engine, db_engine, replace=False) dayone = datetime.date(2011, 1, 1).strftime(predictor.expected_matrix_ts_format) daytwo = datetime.date(2011, 1, 2).strftime(predictor.expected_matrix_ts_format) # create prediction set matrix_data = { 'entity_id': [1, 2, 1, 2], 'as_of_date': [dayone, dayone, daytwo, daytwo], 'feature_one': [3, 4, 5, 6], 'feature_two': [5, 6, 7, 8], 'label': [7, 8, 8, 7] } matrix = pandas.DataFrame.from_dict(matrix_data)\ .set_index(['entity_id', 'as_of_date']) metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE, 'label_timespan': '3month', 'metta-uuid': '1234', 'indices': ['entity_id', 'as_of_date'], 'matrix_type': 'test' } matrix_store = InMemoryMatrixStore(matrix, metadata) predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two']) # When run again, the predictions retrieved from the database # should match. # # Some trickiness here. Let's explain: # # If we are not careful, retrieving predictions from the database and # presenting them as a numpy array can result in a bad ordering, # since the given matrix may not be 'ordered' by some criteria # that can be easily represented by an ORDER BY clause. # # It will sometimes work, because without ORDER BY you will get # it back in the table's physical order, which unless something has # happened to the table will be the order you inserted it, # which could very well be the order in the matrix. # So it's not a bug that would necessarily immediately show itself, # but when it does go wrong your scores will be garbage. # # So we simulate a table order mutation that can happen over time: # Remove the first row and put it at the end. # If the Predictor doesn't explicitly reorder the results, this will fail # Only running on TestPrediction because TrainPrediction behaves the exact same way reorder_session = sessionmaker(bind=db_engine)() obj = reorder_session.query(TestPrediction).first() reorder_session.delete(obj) reorder_session.commit() make_transient(obj) reorder_session = sessionmaker(bind=db_engine)() reorder_session.add(obj) reorder_session.commit() predictor.load_model = Mock() new_predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two']) assert_array_equal(new_predict_proba, predict_proba) assert not predictor.load_model.called
def test_evaluating_early_warning(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) testing_metric_groups = [{ 'metrics': ['precision@', 'recall@', 'true positives@', 'true negatives@', 'false positives@', 'false negatives@'], 'thresholds': { 'percentiles': [5.0, 10.0], 'top_n': [5, 10] } }, { 'metrics': ['f1', 'mediocre', 'accuracy', 'roc_auc', 'average precision score'], }, { 'metrics': ['fbeta@'], 'parameters': [{'beta': 0.75}, {'beta': 1.25}] }] training_metric_groups = [{'metrics': ['accuracy', 'roc_auc']}] custom_metrics = {'mediocre': always_half} model_evaluator = ModelEvaluator(testing_metric_groups, training_metric_groups, db_engine, custom_metrics=custom_metrics ) labels = fake_labels(5) fake_train_matrix_store = MockMatrixStore('train', 'efgh', 5, db_engine, labels) fake_test_matrix_store = MockMatrixStore('test', '1234', 5, db_engine, labels) trained_model, model_id = fake_trained_model( 'myproject', InMemoryModelStorageEngine('myproject'), db_engine ) # Evaluate the testing metrics and test for all of them. model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], fake_test_matrix_store, model_id, ) records = [ row[0] for row in db_engine.execute( '''select distinct(metric || parameter) from test_results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, fake_test_matrix_store.as_of_dates[0]) ) ] assert records == [ 'accuracy', 'average precision score', 'f1', 'false [email protected]_pct', 'false negatives@10_abs', 'false [email protected]_pct', 'false negatives@5_abs', 'false [email protected]_pct', 'false positives@10_abs', 'false [email protected]_pct', 'false positives@5_abs', '[email protected]_beta', '[email protected]_beta', 'mediocre', '[email protected]_pct', 'precision@10_abs', '[email protected]_pct', 'precision@5_abs', '[email protected]_pct', 'recall@10_abs', '[email protected]_pct', 'recall@5_abs', 'roc_auc', 'true [email protected]_pct', 'true negatives@10_abs', 'true [email protected]_pct', 'true negatives@5_abs', 'true [email protected]_pct', 'true positives@10_abs', 'true [email protected]_pct', 'true positives@5_abs' ] # Evaluate the training metrics and test model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id, ) records = [ row[0] for row in db_engine.execute( '''select distinct(metric || parameter) from train_results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, fake_train_matrix_store.as_of_dates[0]) ) ] assert records == ['accuracy', 'roc_auc']
def test_model_scoring_inspections(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) testing_metric_groups = [{ 'metrics': ['precision@', 'recall@', 'fpr@'], 'thresholds': {'percentiles': [50.0], 'top_n': [3]} }, { # ensure we test a non-thresholded metric as well 'metrics': ['accuracy'], }] training_metric_groups = [{'metrics': ['accuracy'], 'thresholds': {'percentiles': [50.0]}}] model_evaluator = ModelEvaluator(testing_metric_groups, training_metric_groups, db_engine) testing_labels = numpy.array([True, False, numpy.nan, True, False]) testing_prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3]) training_labels = numpy.array([False, False, True, True, True, False, True, True]) training_prediction_probas = numpy.array([0.6, 0.4, 0.55, 0.70, 0.3, 0.2, 0.8, 0.6]) fake_train_matrix_store = MockMatrixStore('train', 'efgh', 5, db_engine, training_labels) fake_test_matrix_store = MockMatrixStore('test', '1234', 5, db_engine, testing_labels) trained_model, model_id = fake_trained_model( 'myproject', InMemoryModelStorageEngine('myproject'), db_engine ) # Evaluate testing matrix and test the results model_evaluator.evaluate( testing_prediction_probas, fake_test_matrix_store, model_id, ) for record in db_engine.execute( '''select * from test_results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, fake_test_matrix_store.as_of_dates[0]) ): assert record['num_labeled_examples'] == 4 assert record['num_positive_labels'] == 2 if record['parameter'] == '': assert record['num_labeled_above_threshold'] == 4 elif 'pct' in record['parameter']: assert record['num_labeled_above_threshold'] == 1 else: assert record['num_labeled_above_threshold'] == 2 # Evaluate the training matrix and test the results model_evaluator.evaluate( training_prediction_probas, fake_train_matrix_store, model_id, ) for record in db_engine.execute( '''select * from train_results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, fake_train_matrix_store.as_of_dates[0]) ): assert record['num_labeled_examples'] == 8 assert record['num_positive_labels'] == 5 assert record['value'] == 0.625