def test_model_scoring_inspections(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) metric_groups = [{ 'metrics': ['precision@', 'recall@'], 'thresholds': { 'percentiles': [5.0, 10.0], 'top_n': [5, 10] } }] model_scorer = ModelScorer(metric_groups, db_engine) trained_model, model_id = fake_trained_model( 'myproject', InMemoryModelStorageEngine('myproject'), db_engine) labels = fake_labels(5) as_of_date = datetime.date(2016, 5, 5) evaluation_start = datetime.datetime(2016, 4, 1) evaluation_end = datetime.datetime(2016, 7, 1) prediction_frequency = '1d' model_scorer.score( trained_model.predict_proba(labels)[:, 1], trained_model.predict(labels), labels, model_id, evaluation_start, evaluation_end, prediction_frequency) # assert # that all of the records are there results = db_engine.execute( '''select distinct(metric || parameter) from results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, evaluation_start)) records = [row[0] for row in results] assert records == [ '[email protected]_pct', 'precision@10_abs', '[email protected]_pct', 'precision@5_abs', '[email protected]_pct', 'recall@10_abs', '[email protected]_pct', 'recall@5_abs', ]
def test_evaluating_early_warning(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) testing_metric_groups = [ { "metrics": [ "precision@", "recall@", "true positives@", "true negatives@", "false positives@", "false negatives@", ], "thresholds": {"percentiles": [5.0, 10.0], "top_n": [5, 10]}, }, { "metrics": [ "f1", "mediocre", "accuracy", "roc_auc", "average precision score", ] }, {"metrics": ["fbeta@"], "parameters": [{"beta": 0.75}, {"beta": 1.25}]}, ] training_metric_groups = [{"metrics": ["accuracy", "roc_auc"]}] custom_metrics = {"mediocre": always_half} model_evaluator = ModelEvaluator( testing_metric_groups, training_metric_groups, db_engine, custom_metrics=custom_metrics, ) labels = fake_labels(5) fake_train_matrix_store = MockMatrixStore("train", "efgh", 5, db_engine, labels) fake_test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine, labels) trained_model, model_id = fake_trained_model(db_engine) # Evaluate the testing metrics and test for all of them. model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], fake_test_matrix_store, model_id ) records = [ row[0] for row in db_engine.execute( """select distinct(metric || parameter) from test_results.evaluations where model_id = %s and evaluation_start_time = %s order by 1""", (model_id, fake_test_matrix_store.as_of_dates[0]), ) ] assert records == [ "accuracy", "average precision score", "f1", "false [email protected]_pct", "false negatives@10_abs", "false [email protected]_pct", "false negatives@5_abs", "false [email protected]_pct", "false positives@10_abs", "false [email protected]_pct", "false positives@5_abs", "[email protected]_beta", "[email protected]_beta", "mediocre", "[email protected]_pct", "precision@10_abs", "[email protected]_pct", "precision@5_abs", "[email protected]_pct", "recall@10_abs", "[email protected]_pct", "recall@5_abs", "roc_auc", "true [email protected]_pct", "true negatives@10_abs", "true [email protected]_pct", "true negatives@5_abs", "true [email protected]_pct", "true positives@10_abs", "true [email protected]_pct", "true positives@5_abs", ] # Evaluate the training metrics and test model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], fake_train_matrix_store, model_id ) records = [ row[0] for row in db_engine.execute( """select distinct(metric || parameter) from train_results.evaluations where model_id = %s and evaluation_start_time = %s order by 1""", (model_id, fake_train_matrix_store.as_of_dates[0]), ) ] assert records == ["accuracy", "roc_auc"]
def test_evaluating_early_warning(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) metric_groups = [{ 'metrics': [ 'precision@', 'recall@', 'true positives@', 'true negatives@', 'false positives@', 'false negatives@' ], 'thresholds': { 'percentiles': [5.0, 10.0], 'top_n': [5, 10] } }, { 'metrics': [ 'f1', 'mediocre', 'accuracy', 'roc_auc', 'average precision score' ], }, { 'metrics': ['fbeta@'], 'parameters': [{ 'beta': 0.75 }, { 'beta': 1.25 }] }] custom_metrics = {'mediocre': always_half} model_evaluator = ModelEvaluator(metric_groups, db_engine, custom_metrics=custom_metrics) trained_model, model_id = fake_trained_model( 'myproject', InMemoryModelStorageEngine('myproject'), db_engine) labels = fake_labels(5) as_of_date = datetime.date(2016, 5, 5) model_evaluator.evaluate( trained_model.predict_proba(labels)[:, 1], labels, model_id, as_of_date, as_of_date, '1y') # assert # that all of the records are there records = [ row[0] for row in db_engine.execute( '''select distinct(metric || parameter) from results.evaluations where model_id = %s and evaluation_start_time = %s order by 1''', (model_id, as_of_date)) ] assert records == [ 'accuracy', 'average precision score', 'f1', 'false [email protected]_pct', 'false negatives@10_abs', 'false [email protected]_pct', 'false negatives@5_abs', 'false [email protected]_pct', 'false positives@10_abs', 'false [email protected]_pct', 'false positives@5_abs', '[email protected]_beta', '[email protected]_beta', 'mediocre', '[email protected]_pct', 'precision@10_abs', '[email protected]_pct', 'precision@5_abs', '[email protected]_pct', 'recall@10_abs', '[email protected]_pct', 'recall@5_abs', 'roc_auc', 'true [email protected]_pct', 'true negatives@10_abs', 'true [email protected]_pct', 'true negatives@5_abs', 'true [email protected]_pct', 'true positives@10_abs', 'true [email protected]_pct', 'true positives@5_abs' ]