def test_n_jobs_not_new_model(): grid_config = { 'sklearn.ensemble.AdaBoostClassifier': { 'n_estimators': [10, 100, 1000] }, 'sklearn.ensemble.RandomForestClassifier': { 'n_estimators': [10, 100], 'max_features': ['sqrt', 'log2'], 'max_depth': [5, 10, 15, 20], 'criterion': ['gini', 'entropy'], 'n_jobs': [12, 24], } } with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') trainer = ModelTrainer( project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=S3ModelStorageEngine(s3_conn, 'econ-dev/inspections'), db_engine=engine, model_group_keys=['label_name', 'label_timespan'] ) matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': ['good', 'bad'] }) train_tasks = trainer.generate_train_tasks( grid_config, dict(), InMemoryMatrixStore(matrix, { 'label_timespan': '1d', 'end_time': datetime.datetime.now(), 'feature_start_time': datetime.date(2012, 12, 20), 'label_name': 'label', 'metta-uuid': '1234', 'feature_names': ['ft1', 'ft2'], 'indices': ['entity_id'], }) ) assert len(train_tasks) == 35 # 32+3, would be (32*2)+3 if we didn't remove assert len([ task for task in train_tasks if 'n_jobs' in task['parameters'] ]) == 32 for train_task in train_tasks: trainer.process_train_task(**train_task) for row in engine.execute( 'select model_parameters from results.model_groups' ): assert 'n_jobs' not in row[0]
def test_retry_recovery(self): grid_config = { 'sklearn.ensemble.AdaBoostClassifier': { 'n_estimators': [10] }, } engine = None trainer = None port = None with testing.postgresql.Postgresql() as postgresql: port = postgresql.settings['port'] engine = create_engine(postgresql.url()) ensure_db(engine) trainer = ModelTrainer( project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=InMemoryModelStorageEngine( project_path=''), db_engine=engine, model_group_keys=['label_name', 'label_window']) matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': ['good', 'bad'] }) matrix_store = InMemoryMatrixStore( matrix, { 'label_window': '1d', 'end_time': datetime.datetime.now(), 'beginning_of_time': datetime.date(2012, 12, 20), 'label_name': 'label', 'metta-uuid': '1234', 'feature_names': ['ft1', 'ft2'] }) # start without a database server # then bring it back up after the first sleep # use self so it doesn't go out of scope too early and shut down self.new_server = None def replace_db(arg): self.new_server = testing.postgresql.Postgresql(port=port) engine = create_engine(self.new_server.url()) ensure_db(engine) with patch('time.sleep') as time_mock: time_mock.side_effect = replace_db try: trainer.train_models(grid_config, dict(), matrix_store) finally: if self.new_server is not None: self.new_server.stop() assert len(time_mock.mock_calls) == 1
def test_save_experiment_and_get_hash(): # no reason to make assertions on the config itself, use a basic dict experiment_config = {'one': 'two'} with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) exp_hash = save_experiment_and_get_hash(experiment_config, engine) assert isinstance(exp_hash, str) new_hash = save_experiment_and_get_hash(experiment_config, engine) assert new_hash == exp_hash
def test_retry_max(self): grid_config = { 'sklearn.ensemble.AdaBoostClassifier': { 'n_estimators': [10] }, } engine = None trainer = None # set up a basic model training run # TODO abstract the setup of a basic model training run where # we don't worry about the specific values used? it would make # tests like this require a bit less noise to read past with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) trainer = ModelTrainer( project_path='econ-dev/inspections', experiment_hash=None, model_storage_engine=InMemoryModelStorageEngine(project_path=''), db_engine=engine, model_group_keys=['label_name', 'label_timespan'] ) matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': ['good', 'bad'] }) matrix_store = InMemoryMatrixStore(matrix, { 'label_timespan': '1d', 'end_time': datetime.datetime.now(), 'feature_start_time': datetime.date(2012, 12, 20), 'label_name': 'label', 'metta-uuid': '1234', 'feature_names': ['ft1', 'ft2'], 'indices': ['entity_id'], }) # the postgres server goes out of scope here and thus no longer exists with patch('time.sleep') as time_mock: with self.assertRaises(sqlalchemy.exc.OperationalError): trainer.train_models(grid_config, dict(), matrix_store) # we want to make sure that we are using the retrying module sanely # as opposed to matching the exact # of calls specified by the code assert len(time_mock.mock_calls) > 5
def test_predictor_composite_index(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model(project_path, model_storage_engine, db_engine) predictor = Predictor(project_path, model_storage_engine, db_engine) dayone = datetime.datetime(2011, 1, 1) daytwo = datetime.datetime(2011, 1, 2) # create prediction set matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2, 1, 2], 'as_of_date': [dayone, dayone, daytwo, daytwo], 'feature_one': [3, 4, 5, 6], 'feature_two': [5, 6, 7, 8], 'label': [7, 8, 8, 7] }).set_index(['entity_id', 'as_of_date']) metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE, 'label_timespan': '3month', 'metta-uuid': '1234', 'indices': ['entity_id'], } matrix_store = InMemoryMatrixStore(matrix, metadata) predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two']) # assert # 1. that the returned predictions are of the desired length assert len(predict_proba) == 4 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.predictions join results.models using (model_id)''') ] assert len(records) == 4
def test_calculate_and_save(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) calculator = IndividualImportanceCalculator(db_engine, methods=['sample'], replace=False) # given a trained model # and a test matrix _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) # i expect to be able to call calculate and save calculator.calculate_and_save_all_methods_and_dates( model_id, test_store) # and find individual importances in the results schema afterwards records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.individual_importances join results.models using (model_id)''') ] assert len(records) > 0 # and that when run again, has the same result calculator.calculate_and_save_all_methods_and_dates( model_id, test_store) new_records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.individual_importances join results.models using (model_id)''') ] assert len(records) == len(new_records) assert records == new_records
def test_uniform_distribution_entity_date_index(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) model = ModelFactory() feature_importances = [ FeatureImportanceFactory(model_rel=model, feature='feature_{}'.format(i)) for i in range(0, 10) ] data_dict = { 'entity_id': [1, 1], 'as_of_date': ['2016-01-01', '2017-01-01'] } for imp in feature_importances: data_dict[imp.feature] = [0.5, 0.5] test_store = InMemoryMatrixStore( matrix=pandas.DataFrame.from_dict(data_dict).set_index( ['entity_id', 'as_of_date']), metadata=sample_metadata()) session.commit() results = uniform_distribution(db_engine, model_id=model.model_id, as_of_date='2016-01-01', test_matrix_store=test_store, n_ranks=5) assert len(results) == 5 # 5 features x 1 entity for this as_of_date for result in results: assert 'entity_id' in result assert 'feature_name' in result assert 'score' in result assert 'feature_value' in result assert result['feature_value'] == 0.5 assert result['score'] >= 0 assert result['score'] <= 1 assert isinstance(result['feature_name'], str) assert result['entity_id'] in [1, 2]
def test_predictor_get_train_columns(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) predictor = Predictor(project_path, model_storage_engine, db_engine) predict_proba = predictor.predict( model_id, test_store, misc_db_parameters=dict(), train_matrix_columns=train_store.columns()) # assert # 1. that we calculated predictions assert len(predict_proba) > 0 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.predictions join results.models using (model_id)''') ] assert len(records) > 0
def test_model_scoring_inspections(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) metric_groups = [{ 'metrics': ['precision@', 'recall@', 'fpr@'], 'thresholds': { 'percentiles': [50.0], 'top_n': [3] } }] model_evaluator = ModelEvaluator(metric_groups, db_engine) _, model_id = fake_trained_model( 'myproject', InMemoryModelStorageEngine('myproject'), db_engine) labels = numpy.array([True, False, numpy.nan, True, False]) prediction_probas = numpy.array([0.56, 0.4, 0.55, 0.5, 0.3]) evaluation_start = datetime.datetime(2016, 4, 1) evaluation_end = datetime.datetime(2016, 7, 1) example_frequency = '1d' model_evaluator.evaluate(prediction_probas, labels, model_id, evaluation_start, evaluation_end, example_frequency) for record in db_engine.execute( '''select * from results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, evaluation_start)): assert record['num_labeled_examples'] == 4 assert record['num_positive_labels'] == 2 if 'pct' in record['parameter']: assert record['num_labeled_above_threshold'] == 1 else: assert record['num_labeled_above_threshold'] == 2
def test_evaluating_early_warning(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) metric_groups = [{ 'metrics': [ 'precision@', 'recall@', 'true positives@', 'true negatives@', 'false positives@', 'false negatives@' ], 'thresholds': { 'percentiles': [5.0, 10.0], 'top_n': [5, 10] } }, { 'metrics': [ 'f1', 'mediocre', 'accuracy', 'roc_auc', 'average precision score' ], }, { 'metrics': ['fbeta@'], 'parameters': [{ 'beta': 0.75 }, { 'beta': 1.25 }] }] custom_metrics = {'mediocre': always_half} model_evaluator = ModelEvaluator(metric_groups, db_engine, custom_metrics=custom_metrics) trained_model, model_id = fake_trained_model( 'myproject', InMemoryModelStorageEngine('myproject'), db_engine) labels = fake_labels(5) as_of_date = datetime.date(2016, 5, 5) model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], labels, model_id, as_of_date, as_of_date, '1y') # assert # that all of the records are there records = [ row[0] for row in db_engine.execute( '''select distinct(metric || parameter) from results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, as_of_date)) ] assert records == [ 'accuracy', 'average precision score', 'f1', 'false [email protected]_pct', 'false negatives@10_abs', 'false [email protected]_pct', 'false negatives@5_abs', 'false [email protected]_pct', 'false positives@10_abs', 'false [email protected]_pct', 'false positives@5_abs', '[email protected]_beta', '[email protected]_beta', 'mediocre', '[email protected]_pct', 'precision@10_abs', '[email protected]_pct', 'precision@5_abs', '[email protected]_pct', 'recall@10_abs', '[email protected]_pct', 'recall@5_abs', 'roc_auc', 'true [email protected]_pct', 'true negatives@10_abs', 'true [email protected]_pct', 'true negatives@5_abs', 'true [email protected]_pct', 'true positives@10_abs', 'true [email protected]_pct', 'true positives@5_abs' ]
def test_DistanceFromBestTable(): with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) init_engine(engine) model_groups = { 'stable': ModelGroupFactory(model_type='myStableClassifier'), 'bad': ModelGroupFactory(model_type='myBadClassifier'), 'spiky': ModelGroupFactory(model_type='mySpikeClassifier'), } class StableModelFactory(ModelFactory): model_group_rel = model_groups['stable'] class BadModelFactory(ModelFactory): model_group_rel = model_groups['bad'] class SpikyModelFactory(ModelFactory): model_group_rel = model_groups['spiky'] models = { 'stable_3y_ago': StableModelFactory(train_end_time='2014-01-01'), 'stable_2y_ago': StableModelFactory(train_end_time='2015-01-01'), 'stable_1y_ago': StableModelFactory(train_end_time='2016-01-01'), 'bad_3y_ago': BadModelFactory(train_end_time='2014-01-01'), 'bad_2y_ago': BadModelFactory(train_end_time='2015-01-01'), 'bad_1y_ago': BadModelFactory(train_end_time='2016-01-01'), 'spiky_3y_ago': SpikyModelFactory(train_end_time='2014-01-01'), 'spiky_2y_ago': SpikyModelFactory(train_end_time='2015-01-01'), 'spiky_1y_ago': SpikyModelFactory(train_end_time='2016-01-01'), } class ImmediateEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: o.model_rel.train_end_time) evaluation_end_time = factory.LazyAttribute( lambda o: _sql_add_days(o.model_rel.train_end_time, 1)) class MonthOutEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: _sql_add_days(o.model_rel.train_end_time, 31)) evaluation_end_time = factory.LazyAttribute( lambda o: _sql_add_days(o.model_rel.train_end_time, 32)) class Precision100Factory(ImmediateEvalFactory): metric = 'precision@' parameter = '100_abs' class Precision100FactoryMonthOut(MonthOutEvalFactory): metric = 'precision@' parameter = '100_abs' class Recall100Factory(ImmediateEvalFactory): metric = 'recall@' parameter = '100_abs' class Recall100FactoryMonthOut(MonthOutEvalFactory): metric = 'recall@' parameter = '100_abs' for add_val, PrecFac, RecFac in [ (0, Precision100Factory, Recall100Factory), (-0.15, Precision100FactoryMonthOut, Recall100FactoryMonthOut) ]: PrecFac(model_rel=models['stable_3y_ago'], value=0.6 + add_val) PrecFac(model_rel=models['stable_2y_ago'], value=0.57 + add_val) PrecFac(model_rel=models['stable_1y_ago'], value=0.59 + add_val) PrecFac(model_rel=models['bad_3y_ago'], value=0.4 + add_val) PrecFac(model_rel=models['bad_2y_ago'], value=0.39 + add_val) PrecFac(model_rel=models['bad_1y_ago'], value=0.43 + add_val) PrecFac(model_rel=models['spiky_3y_ago'], value=0.8 + add_val) PrecFac(model_rel=models['spiky_2y_ago'], value=0.4 + add_val) PrecFac(model_rel=models['spiky_1y_ago'], value=0.4 + add_val) RecFac(model_rel=models['stable_3y_ago'], value=0.55 + add_val) RecFac(model_rel=models['stable_2y_ago'], value=0.56 + add_val) RecFac(model_rel=models['stable_1y_ago'], value=0.55 + add_val) RecFac(model_rel=models['bad_3y_ago'], value=0.35 + add_val) RecFac(model_rel=models['bad_2y_ago'], value=0.34 + add_val) RecFac(model_rel=models['bad_1y_ago'], value=0.36 + add_val) RecFac(model_rel=models['spiky_3y_ago'], value=0.35 + add_val) RecFac(model_rel=models['spiky_2y_ago'], value=0.8 + add_val) RecFac(model_rel=models['spiky_1y_ago'], value=0.36 + add_val) session.commit() distance_table = DistanceFromBestTable(db_engine=engine, models_table='models', distance_table='dist_table') metrics = [{ 'metric': 'precision@', 'parameter': '100_abs' }, { 'metric': 'recall@', 'parameter': '100_abs' }] model_group_ids = [mg.model_group_id for mg in model_groups.values()] distance_table.create_and_populate( model_group_ids, ['2014-01-01', '2015-01-01', '2016-01-01'], metrics) # get an ordered list of the models/groups for a particular metric/time query = ''' select model_id, raw_value, dist_from_best_case, dist_from_best_case_next_time from dist_table where metric = %s and parameter = %s and train_end_time = %s order by dist_from_best_case ''' prec_3y_ago = engine.execute(query, ('precision@', '100_abs', '2014-01-01')) assert [row for row in prec_3y_ago] == [ (models['spiky_3y_ago'].model_id, 0.8, 0, 0.17), (models['stable_3y_ago'].model_id, 0.6, 0.2, 0), (models['bad_3y_ago'].model_id, 0.4, 0.4, 0.18), ] recall_2y_ago = engine.execute(query, ('recall@', '100_abs', '2015-01-01')) assert [row for row in recall_2y_ago] == [ (models['spiky_2y_ago'].model_id, 0.8, 0, 0.19), (models['stable_2y_ago'].model_id, 0.56, 0.24, 0), (models['bad_2y_ago'].model_id, 0.34, 0.46, 0.19), ] assert distance_table.observed_bounds == { ('precision@', '100_abs'): (0.39, 0.8), ('recall@', '100_abs'): (0.34, 0.8), }
def test_integration(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') project_path = 'econ-dev/inspections' # create train and test matrices train_matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': [7, 8] }).set_index('entity_id') train_metadata = { 'beginning_of_time': datetime.date(2012, 12, 20), 'end_time': datetime.date(2016, 12, 20), 'label_name': 'label', 'label_window': '1y', 'feature_names': ['ft1', 'ft2'], 'metta-uuid': '1234', } train_store = InMemoryMatrixStore(train_matrix, train_metadata) as_of_dates = [ datetime.date(2016, 12, 21), datetime.date(2017, 1, 21) ] test_stores = [ InMemoryMatrixStore( pandas.DataFrame.from_dict({ 'entity_id': [3], 'feature_one': [8], 'feature_two': [5], 'label': [5] }).set_index('entity_id'), { 'label_name': 'label', 'label_window': '1y', 'end_time': as_of_date, 'metta-uuid': '1234', }) for as_of_date in as_of_dates ] model_storage_engine = S3ModelStorageEngine(s3_conn, project_path) experiment_hash = save_experiment_and_get_hash({}, db_engine) # instantiate pipeline objects trainer = ModelTrainer( project_path=project_path, experiment_hash=experiment_hash, model_storage_engine=model_storage_engine, db_engine=db_engine, model_group_keys=['label_name', 'label_window']) predictor = Predictor(project_path, model_storage_engine, db_engine) model_evaluator = ModelEvaluator([{ 'metrics': ['precision@'], 'thresholds': { 'top_n': [5] } }], db_engine) # run the pipeline grid_config = { 'sklearn.linear_model.LogisticRegression': { 'C': [0.00001, 0.0001], 'penalty': ['l1', 'l2'], 'random_state': [2193] } } model_ids = trainer.train_models(grid_config=grid_config, misc_db_parameters=dict(), matrix_store=train_store) for model_id in model_ids: for as_of_date, test_store in zip(as_of_dates, test_stores): predictions_proba = predictor.predict( model_id, test_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two']) model_evaluator.evaluate(predictions_proba, test_store.labels(), model_id, as_of_date, as_of_date, '6month') # assert # 1. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute( '''select entity_id, model_id, as_of_date from results.predictions join results.models using (model_id) order by 3, 2''') ] assert records == [ (3, 1, datetime.datetime(2016, 12, 21)), (3, 2, datetime.datetime(2016, 12, 21)), (3, 3, datetime.datetime(2016, 12, 21)), (3, 4, datetime.datetime(2016, 12, 21)), (3, 1, datetime.datetime(2017, 1, 21)), (3, 2, datetime.datetime(2017, 1, 21)), (3, 3, datetime.datetime(2017, 1, 21)), (3, 4, datetime.datetime(2017, 1, 21)), ] # that evaluations are there records = [ row for row in db_engine.execute(''' select model_id, evaluation_start_time, metric, parameter from results.evaluations order by 2, 1''') ] assert records == [ (1, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (2, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (3, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (4, datetime.datetime(2016, 12, 21), 'precision@', '5_abs'), (1, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), (2, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), (3, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), (4, datetime.datetime(2017, 1, 21), 'precision@', '5_abs'), ]
def create_sample_distance_table(engine): ensure_db(engine) init_engine(engine) model_groups = { 'stable': ModelGroupFactory(model_type='myStableClassifier'), 'spiky': ModelGroupFactory(model_type='mySpikeClassifier'), } class StableModelFactory(ModelFactory): model_group_rel = model_groups['stable'] class SpikyModelFactory(ModelFactory): model_group_rel = model_groups['spiky'] models = { 'stable_3y_ago': StableModelFactory(train_end_time='2014-01-01'), 'stable_2y_ago': StableModelFactory(train_end_time='2015-01-01'), 'stable_1y_ago': StableModelFactory(train_end_time='2016-01-01'), 'spiky_3y_ago': SpikyModelFactory(train_end_time='2014-01-01'), 'spiky_2y_ago': SpikyModelFactory(train_end_time='2015-01-01'), 'spiky_1y_ago': SpikyModelFactory(train_end_time='2016-01-01'), } session.commit() distance_table = DistanceFromBestTable( db_engine=engine, models_table='models', distance_table='dist_table' ) distance_table._create() stable_grp = model_groups['stable'].model_group_id spiky_grp = model_groups['spiky'].model_group_id stable_3y_id = models['stable_3y_ago'].model_id stable_3y_end = models['stable_3y_ago'].train_end_time stable_2y_id = models['stable_2y_ago'].model_id stable_2y_end = models['stable_2y_ago'].train_end_time stable_1y_id = models['stable_1y_ago'].model_id stable_1y_end = models['stable_1y_ago'].train_end_time spiky_3y_id = models['spiky_3y_ago'].model_id spiky_3y_end = models['spiky_3y_ago'].train_end_time spiky_2y_id = models['spiky_2y_ago'].model_id spiky_2y_end = models['spiky_2y_ago'].train_end_time spiky_1y_id = models['spiky_1y_ago'].model_id spiky_1y_end = models['spiky_1y_ago'].train_end_time distance_rows = [ (stable_grp, stable_3y_id, stable_3y_end, 'precision@', '100_abs', 0.5, 0.6, 0.1, 0.5, 0.15), (stable_grp, stable_2y_id, stable_2y_end, 'precision@', '100_abs', 0.5, 0.84, 0.34, 0.5, 0.18), (stable_grp, stable_1y_id, stable_1y_end, 'precision@', '100_abs', 0.46, 0.67, 0.21, 0.5, 0.11), (spiky_grp, spiky_3y_id, spiky_3y_end, 'precision@', '100_abs', 0.45, 0.6, 0.15, 0.5, 0.19), (spiky_grp, spiky_2y_id, spiky_2y_end, 'precision@', '100_abs', 0.84, 0.84, 0.0, 0.5, 0.3), (spiky_grp, spiky_1y_id, spiky_1y_end, 'precision@', '100_abs', 0.45, 0.67, 0.22, 0.5, 0.12), (stable_grp, stable_3y_id, stable_3y_end, 'recall@', '100_abs', 0.4, 0.4, 0.0, 0.4, 0.0), (stable_grp, stable_2y_id, stable_2y_end, 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.5, 0.0), (stable_grp, stable_1y_id, stable_1y_end, 'recall@', '100_abs', 0.6, 0.6, 0.0, 0.6, 0.0), (spiky_grp, spiky_3y_id, spiky_3y_end, 'recall@', '100_abs', 0.65, 0.65, 0.0, 0.65, 0.0), (spiky_grp, spiky_2y_id, spiky_2y_end, 'recall@', '100_abs', 0.55, 0.55, 0.0, 0.55, 0.0), (spiky_grp, spiky_1y_id, spiky_1y_end, 'recall@', '100_abs', 0.45, 0.45, 0.0, 0.45, 0.0), ] for dist_row in distance_rows: engine.execute( 'insert into dist_table values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)', dist_row ) return distance_table, model_groups
def test_predictor(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') project_path = 'econ-dev/inspections' model_storage_engine = S3ModelStorageEngine(s3_conn, project_path) _, model_id = \ fake_trained_model(project_path, model_storage_engine, db_engine) predictor = Predictor(project_path, model_storage_engine, db_engine) # create prediction set matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': [7, 8] }).set_index('entity_id') metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE, 'label_window': '3month', 'metta-uuid': '1234', } matrix_store = InMemoryMatrixStore(matrix, metadata) train_matrix_columns = ['feature_one', 'feature_two'] predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns) # assert # 1. that the returned predictions are of the desired length assert len(predict_proba) == 2 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.predictions join results.models using (model_id)''') ] assert len(records) == 2 # 3. that the contained as_of_dates match what we sent in for record in records: assert record[1].date() == AS_OF_DATE # 4. that the entity ids match the given dataset assert sorted([record[0] for record in records]) == [1, 2] # 5. running with same model_id, different as of date # then with same as of date only replaces the records # with the same date new_matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': [7, 8] }).set_index('entity_id') new_metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE + datetime.timedelta(days=1), 'label_window': '3month', 'metta-uuid': '1234', } new_matrix_store = InMemoryMatrixStore(new_matrix, new_metadata) predictor.predict(model_id, new_matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns) predictor.predict(model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns) records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.predictions join results.models using (model_id)''') ] assert len(records) == 4 # 6. That we can delete the model when done prediction on it predictor.delete_model(model_id) assert predictor.load_model(model_id) == None
def test_Auditioner(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) # set up data, randomly generated by the factories but conforming # generally to what we expect results schema data to look like num_model_groups = 10 model_types = [ 'classifier type {}'.format(i) for i in range(0, num_model_groups) ] model_groups = [ ModelGroupFactory(model_type=model_type) for model_type in model_types ] train_end_times = [ datetime(2013, 1, 1), datetime(2014, 1, 1), datetime(2015, 1, 1), datetime(2016, 1, 1), ] models = [ ModelFactory(model_group_rel=model_group, train_end_time=train_end_time) for model_group in model_groups for train_end_time in train_end_times ] metrics = [ ('precision@', '100_abs'), ('recall@', '100_abs'), ('precision@', '50_abs'), ('recall@', '50_abs'), ('fpr@', '10_pct'), ] class ImmediateEvalFactory(EvaluationFactory): evaluation_start_time = factory.LazyAttribute( lambda o: o.model_rel.train_end_time) _ = [ ImmediateEvalFactory(model_rel=model, metric=metric, parameter=parameter) for metric, parameter in metrics for model in models ] session.commit() # define a very loose filtering that should admit all model groups no_filtering = [{ 'metric': 'precision@', 'parameter': '100_abs', 'max_from_best': 1.0, 'threshold_value': 0.0 }, { 'metric': 'recall@', 'parameter': '100_abs', 'max_from_best': 1.0, 'threshold_value': 0.0 }] model_group_ids = [mg.model_group_id for mg in model_groups] auditioner = Auditioner( db_engine, model_group_ids, train_end_times, no_filtering, ) assert len(auditioner.thresholded_model_group_ids) == num_model_groups auditioner.plot_model_groups() # here, we pick thresholding rules that should definitely remove # all model groups from contention because they are too strict. remove_all = [{ 'metric': 'precision@', 'parameter': '100_abs', 'max_from_best': 0.0, 'threshold_value': 1.1 }, { 'metric': 'recall@', 'parameter': '100_abs', 'max_from_best': 0.0, 'threshold_value': 1.1 }] auditioner.update_metric_filters(remove_all) assert len(auditioner.thresholded_model_group_ids) == 0 # one potential place for bugs would be when we pull back the rules # for being too restrictive. we want to make sure that the original list is # always used for thresholding, or else such a move would be impossible auditioner.update_metric_filters(no_filtering) assert len(auditioner.thresholded_model_group_ids) == num_model_groups # now, we want to take this partially thresholded list and run it through # a grid of selection rules, meant to pick winners by a variety of user-defined # criteria rule_grid = [{ 'shared_parameters': [ { 'metric': 'precision@', 'parameter': '100_abs' }, { 'metric': 'recall@', 'parameter': '100_abs' }, ], 'selection_rules': [{ 'name': 'most_frequent_best_dist', 'dist_from_best_case': [0.1, 0.2, 0.3] }, { 'name': 'best_current_value' }] }, { 'shared_parameters': [ { 'metric1': 'precision@', 'parameter1': '100_abs' }, ], 'selection_rules': [ { 'name': 'best_average_two_metrics', 'metric2': ['recall@'], 'parameter2': ['100_abs'], 'metric1_weight': [0.4, 0.5, 0.6] }, ] }] auditioner.register_selection_rule_grid(rule_grid, plot=False) final_model_group_ids = auditioner.selection_rule_model_group_ids # we expect the result to be a mapping of selection rule name to model group id assert isinstance(final_model_group_ids, dict) # we expect that there is one winner for each selection rule assert sorted(final_model_group_ids.keys()) == \ sorted([rule.descriptive_name for rule in auditioner.selection_rules]) # we expect that the results written to the yaml file are the # chosen model groups and their rules # however because the source data is randomly generated we could have a # different list on consecutive runs # and don't want to introduce non-determinism to the test with tempfile.NamedTemporaryFile() as tf: auditioner.write_tyra_config(tf.name) assert sorted(yaml.load(tf)['selection_rule_model_groups'].keys()) == \ sorted(final_model_group_ids.keys())
def replace_db(arg): self.new_server = testing.postgresql.Postgresql(port=port) engine = create_engine(self.new_server.url()) ensure_db(engine)
def test_model_trainer(): with testing.postgresql.Postgresql() as postgresql: engine = create_engine(postgresql.url()) ensure_db(engine) grid_config = { 'sklearn.linear_model.LogisticRegression': { 'C': [0.00001, 0.0001], 'penalty': ['l1', 'l2'], 'random_state': [2193] } } with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') # create training set matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': ['good', 'bad'] }) metadata = { 'feature_start_time': datetime.date(2012, 12, 20), 'end_time': datetime.date(2016, 12, 20), 'label_name': 'label', 'label_timespan': '1y', 'metta-uuid': '1234', 'feature_names': ['ft1', 'ft2'], 'indices': ['entity_id'], } project_path = 'econ-dev/inspections' model_storage_engine = S3ModelStorageEngine(s3_conn, project_path) trainer = ModelTrainer( project_path=project_path, experiment_hash=None, model_storage_engine=model_storage_engine, db_engine=engine, model_group_keys=['label_name', 'label_timespan'] ) matrix_store = InMemoryMatrixStore(matrix, metadata) model_ids = trainer.train_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=matrix_store ) # assert # 1. that the models and feature importances table entries are present records = [ row for row in engine.execute('select * from results.feature_importances') ] assert len(records) == 4 * 2 # maybe exclude entity_id? yes records = [ row for row in engine.execute('select model_hash from results.models') ] assert len(records) == 4 cache_keys = [ model_cache_key(project_path, model_row[0], s3_conn) for model_row in records ] # 2. that the model groups are distinct records = [ row for row in engine.execute('select distinct model_group_id from results.models') ] assert len(records) == 4 # 3. that all four models are cached model_pickles = [ pickle.loads(cache_key.get()['Body'].read()) for cache_key in cache_keys ] assert len(model_pickles) == 4 assert len([x for x in model_pickles if x is not None]) == 4 # 4. that their results can have predictions made on it test_matrix = pandas.DataFrame.from_dict({ 'entity_id': [3, 4], 'feature_one': [4, 4], 'feature_two': [6, 5], }) test_matrix = InMemoryMatrixStore(matrix=test_matrix, metadata=metadata).matrix for model_pickle in model_pickles: predictions = model_pickle.predict(test_matrix) assert len(predictions) == 2 # 5. when run again, same models are returned new_model_ids = trainer.train_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=matrix_store ) assert len([ row for row in engine.execute('select model_hash from results.models') ]) == 4 assert model_ids == new_model_ids # 6. if replace is set, update non-unique attributes and feature importances max_batch_run_time = [ row[0] for row in engine.execute('select max(batch_run_time) from results.models') ][0] trainer = ModelTrainer( project_path=project_path, experiment_hash=None, model_storage_engine=model_storage_engine, db_engine=engine, model_group_keys=['label_name', 'label_timespan'], replace=True ) new_model_ids = trainer.train_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=matrix_store, ) assert model_ids == new_model_ids assert [ row['model_id'] for row in engine.execute('select model_id from results.models order by 1 asc') ] == model_ids new_max_batch_run_time = [ row[0] for row in engine.execute('select max(batch_run_time) from results.models') ][0] assert new_max_batch_run_time > max_batch_run_time records = [ row for row in engine.execute('select * from results.feature_importances') ] assert len(records) == 4 * 2 # maybe exclude entity_id? yes # 7. if the cache is missing but the metadata is still there, reuse the metadata for row in engine.execute('select model_hash from results.models'): model_storage_engine.get_store(row[0]).delete() new_model_ids = trainer.train_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=matrix_store ) assert model_ids == sorted(new_model_ids) # 8. that the generator interface works the same way new_model_ids = trainer.generate_trained_models( grid_config=grid_config, misc_db_parameters=dict(), matrix_store=matrix_store ) assert model_ids == \ sorted([model_id for model_id in new_model_ids])
def setup_data(self, engine): ensure_db(engine) init_engine(engine) ModelGroupFactory(model_group_id=1, model_type='modelType1') ModelGroupFactory(model_group_id=2, model_type='modelType2') ModelGroupFactory(model_group_id=3, model_type='modelType3') ModelGroupFactory(model_group_id=4, model_type='modelType4') ModelGroupFactory(model_group_id=5, model_type='modelType5') session.commit() distance_table = DistanceFromBestTable(db_engine=engine, models_table='models', distance_table='dist_table') distance_table._create() distance_rows = [ # 2014: model group 1 should pass both close and min checks (1, 1, '2014-01-01', 'precision@', '100_abs', 0.5, 0.5, 0.0, 0.38), (1, 1, '2014-01-01', 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.38), (1, 1, '2014-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2015: model group 1 should not pass close check (1, 2, '2015-01-01', 'precision@', '100_abs', 0.5, 0.88, 0.38, 0.0 ), (1, 2, '2015-01-01', 'recall@', '100_abs', 0.5, 0.88, 0.38, 0.0), (1, 2, '2015-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), (1, 3, '2016-01-01', 'precision@', '100_abs', 0.46, 0.46, 0.0, 0.11), (1, 3, '2016-01-01', 'recall@', '100_abs', 0.46, 0.46, 0.0, 0.11), (1, 3, '2016-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2014: model group 2 should not pass min check (2, 4, '2014-01-01', 'precision@', '100_abs', 0.39, 0.5, 0.11, 0.5 ), (2, 4, '2014-01-01', 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.38), (2, 4, '2014-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2015: model group 2 should pass both checks (2, 5, '2015-01-01', 'precision@', '100_abs', 0.69, 0.88, 0.19, 0.12), (2, 5, '2015-01-01', 'recall@', '100_abs', 0.69, 0.88, 0.19, 0.0), (2, 5, '2015-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), (2, 6, '2016-01-01', 'precision@', '100_abs', 0.34, 0.46, 0.12, 0.11), (2, 6, '2016-01-01', 'recall@', '100_abs', 0.46, 0.46, 0.0, 0.11), (2, 6, '2016-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # model group 3 not included in this round (3, 7, '2014-01-01', 'precision@', '100_abs', 0.28, 0.5, 0.22, 0.0 ), (3, 7, '2014-01-01', 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.38), (3, 7, '2014-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), (3, 8, '2015-01-01', 'precision@', '100_abs', 0.88, 0.88, 0.0, 0.02), (3, 8, '2015-01-01', 'recall@', '100_abs', 0.5, 0.88, 0.38, 0.0), (3, 8, '2015-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), (3, 9, '2016-01-01', 'precision@', '100_abs', 0.44, 0.46, 0.02, 0.11), (3, 9, '2016-01-01', 'recall@', '100_abs', 0.46, 0.46, 0.0, 0.11), (3, 9, '2016-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2014: model group 4 should not pass any checks (4, 10, '2014-01-01', 'precision@', '100_abs', 0.29, 0.5, 0.21, 0.21), (4, 10, '2014-01-01', 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.38), (4, 10, '2014-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2015: model group 4 should not pass close check (4, 11, '2015-01-01', 'precision@', '100_abs', 0.67, 0.88, 0.21, 0.21), (4, 11, '2015-01-01', 'recall@', '100_abs', 0.5, 0.88, 0.38, 0.0), (4, 11, '2015-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), (4, 12, '2016-01-01', 'precision@', '100_abs', 0.25, 0.46, 0.21, 0.21), (4, 12, '2016-01-01', 'recall@', '100_abs', 0.46, 0.46, 0.0, 0.11), (4, 12, '2016-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2014: model group 5 should not pass because precision is good but not recall (5, 13, '2014-01-01', 'precision@', '100_abs', 0.5, 0.38, 0.0, 0.38 ), (5, 13, '2014-01-01', 'recall@', '100_abs', 0.3, 0.5, 0.2, 0.38), (5, 13, '2014-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2015: model group 5 should not pass because precision is good but not recall (5, 14, '2015-01-01', 'precision@', '100_abs', 0.5, 0.88, 0.38, 0.0 ), (5, 14, '2015-01-01', 'recall@', '100_abs', 0.3, 0.88, 0.58, 0.0), (5, 14, '2015-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), (5, 15, '2016-01-01', 'precision@', '100_abs', 0.46, 0.46, 0.0, 0.11), (5, 15, '2016-01-01', 'recall@', '100_abs', 0.3, 0.46, 0.16, 0.11), (5, 15, '2016-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), # 2014: model group 6 is failed by false positives (6, 16, '2014-01-01', 'precision@', '100_abs', 0.5, 0.5, 0.0, 0.38 ), (6, 16, '2014-01-01', 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.38), (6, 16, '2014-01-01', 'false positives@', '100_abs', 60, 30, 30, 10), # 2015: model group 6 is failed by false positives (6, 17, '2015-01-01', 'precision@', '100_abs', 0.5, 0.88, 0.38, 0.0 ), (6, 17, '2015-01-01', 'recall@', '100_abs', 0.5, 0.38, 0.0, 0.38), (6, 17, '2015-01-01', 'false positives@', '100_abs', 60, 30, 30, 10), (6, 18, '2016-01-01', 'precision@', '100_abs', 0.46, 0.46, 0.0, 0.11), (6, 18, '2016-01-01', 'recall@', '100_abs', 0.5, 0.5, 0.0, 0.38), (6, 18, '2016-01-01', 'false positives@', '100_abs', 40, 30, 10, 10), ] for dist_row in distance_rows: engine.execute( 'insert into dist_table values (%s, %s, %s, %s, %s, %s, %s, %s, %s)', dist_row) thresholder = ModelGroupThresholder( distance_from_best_table=distance_table, train_end_times=['2014-01-01', '2015-01-01'], initial_model_group_ids=[1, 2, 4, 5, 6], initial_metric_filters=self.metric_filters) return thresholder
def test_predictor_retrieve(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model(project_path, model_storage_engine, db_engine) predictor = Predictor(project_path, model_storage_engine, db_engine, replace=False) dayone = datetime.date(2011, 1, 1).strftime(predictor.expected_matrix_ts_format) daytwo = datetime.date(2011, 1, 2).strftime(predictor.expected_matrix_ts_format) # create prediction set matrix_data = { 'entity_id': [1, 2, 1, 2], 'as_of_date': [dayone, dayone, daytwo, daytwo], 'feature_one': [3, 4, 5, 6], 'feature_two': [5, 6, 7, 8], 'label': [7, 8, 8, 7] } matrix = pandas.DataFrame.from_dict(matrix_data)\ .set_index(['entity_id', 'as_of_date']) metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE, 'label_window': '3month', 'metta-uuid': '1234', } matrix_store = InMemoryMatrixStore(matrix, metadata) predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two']) # When run again, the predictions retrieved from the database # should match. # # Some trickiness here. Let's explain: # # If we are not careful, retrieving predictions from the database and # presenting them as a numpy array can result in a bad ordering, # since the given matrix may not be 'ordered' by some criteria # that can be easily represented by an ORDER BY clause. # # It will sometimes work, because without ORDER BY you will get # it back in the table's physical order, which unless something has # happened to the table will be the order you inserted it, # which could very well be the order in the matrix. # So it's not a bug that would necessarily immediately show itself, # but when it does go wrong your scores will be garbage. # # So we simulate a table order mutation that can happen over time: # Remove the first row and put it at the end. # If the Predictor doesn't explicitly reorder the results, this will fail session = sessionmaker(bind=db_engine)() obj = session.query(Prediction).first() session.delete(obj) session.commit() make_transient(obj) session = sessionmaker(bind=db_engine)() session.add(obj) session.commit() predictor.load_model = Mock() new_predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=['feature_one', 'feature_two']) assert_array_equal(new_predict_proba, predict_proba) assert not predictor.load_model.called