def test_ModelEvaluator_needs_evaluation_with_bias_audit( db_engine_with_results_schema): # test that if a bias audit config is passed, and there are no matching bias audits # in the database, needs_evaluation returns true # this all assumes that evaluations are populated. those tests are in the 'no_bias_audit' test model_evaluator = ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": { "top_n": [3] }, }, ], training_metric_groups=[], bias_config={'thresholds': { 'top_n': [2] }}, db_engine=db_engine_with_results_schema, ) model_with_evaluations = ModelFactory() eval_time = datetime.datetime(2016, 1, 1) as_of_date_frequency = "3d" for subset_hash in [""]: EvaluationFactory( model_rel=model_with_evaluations, evaluation_start_time=eval_time, evaluation_end_time=eval_time, as_of_date_frequency=as_of_date_frequency, metric="precision@", parameter="3_abs", subset_hash=subset_hash, ) session.commit() # make a test matrix to pass in metadata_overrides = { 'as_of_date_frequency': as_of_date_frequency, 'as_of_times': [eval_time], } test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine_with_results_schema, metadata_overrides=metadata_overrides) assert model_evaluator.needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, subset_hash="", )
def test_ModelEvaluator_needs_evaluation_no_bias_audit(db_engine_with_results_schema): # TEST SETUP: # create two models: one that has zero evaluations, # one that has an evaluation for precision@100_abs # both overall and for each subset model_with_evaluations = ModelFactory() model_without_evaluations = ModelFactory() eval_time = datetime.datetime(2016, 1, 1) as_of_date_frequency = "3d" for subset_hash in [""] + [filename_friendly_hash(subset) for subset in SUBSETS]: EvaluationFactory( model_rel=model_with_evaluations, evaluation_start_time=eval_time, evaluation_end_time=eval_time, as_of_date_frequency=as_of_date_frequency, metric="precision@", parameter="100_abs", subset_hash=subset_hash, ) session.commit() # make a test matrix to pass in metadata_overrides = { "as_of_date_frequency": as_of_date_frequency, "as_of_times": [eval_time], } test_matrix_store = MockMatrixStore( "test", "1234", 5, db_engine_with_results_schema, metadata_overrides=metadata_overrides, ) train_matrix_store = MockMatrixStore( "train", "2345", 5, db_engine_with_results_schema, metadata_overrides=metadata_overrides, ) # the evaluated model has test evaluations for precision, but not recall, # so this needs evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@", "recall@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, subset_hash=subset_hash, ) # the evaluated model has test evaluations for precision, # so this should not need evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert not ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, subset_hash=subset_hash, ) # the non-evaluated model has no evaluations, # so this should need evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=test_matrix_store, model_id=model_without_evaluations.model_id, subset_hash=subset_hash, ) # the evaluated model has no *train* evaluations, # so the train matrix should need evaluations for subset in SUBSETS: if not subset: subset_hash = "" else: subset_hash = filename_friendly_hash(subset) assert ModelEvaluator( testing_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], training_metric_groups=[ { "metrics": ["precision@"], "thresholds": {"top_n": [100]}, } ], db_engine=db_engine_with_results_schema, ).needs_evaluations( matrix_store=train_matrix_store, model_id=model_with_evaluations.model_id, subset_hash=subset_hash, ) session.close() session.remove()
def test_ModelEvaluator_needs_evaluation(db_engine): ensure_db(db_engine) init_engine(db_engine) # TEST SETUP: # create two models: one that has zero evaluations, # one that has an evaluation for precision@100_abs model_with_evaluations = ModelFactory() model_without_evaluations = ModelFactory() eval_time = datetime.datetime(2016, 1, 1) as_of_date_frequency = "3d" EvaluationFactory(model_rel=model_with_evaluations, evaluation_start_time=eval_time, evaluation_end_time=eval_time, as_of_date_frequency=as_of_date_frequency, metric="precision@", parameter="100_abs") session.commit() # make a test matrix to pass in metadata_overrides = { 'as_of_date_frequency': as_of_date_frequency, 'end_time': eval_time, } test_matrix_store = MockMatrixStore("test", "1234", 5, db_engine, metadata_overrides=metadata_overrides) train_matrix_store = MockMatrixStore("train", "2345", 5, db_engine, metadata_overrides=metadata_overrides) # the evaluated model has test evaluations for precision, but not recall, # so this needs evaluations assert ModelEvaluator(testing_metric_groups=[{ "metrics": ["precision@", "recall@"], "thresholds": { "top_n": [100] }, }], training_metric_groups=[], db_engine=db_engine).needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, ) # the evaluated model has test evaluations for precision, # so this should not need evaluations assert not ModelEvaluator(testing_metric_groups=[{ "metrics": ["precision@"], "thresholds": { "top_n": [100] }, }], training_metric_groups=[], db_engine=db_engine).needs_evaluations( matrix_store=test_matrix_store, model_id=model_with_evaluations.model_id, ) # the non-evaluated model has no evaluations, # so this should need evaluations assert ModelEvaluator(testing_metric_groups=[{ "metrics": ["precision@"], "thresholds": { "top_n": [100] }, }], training_metric_groups=[], db_engine=db_engine).needs_evaluations( matrix_store=test_matrix_store, model_id=model_without_evaluations.model_id, ) # the evaluated model has no *train* evaluations, # so the train matrix should need evaluations assert ModelEvaluator(testing_metric_groups=[{ "metrics": ["precision@"], "thresholds": { "top_n": [100] }, }], training_metric_groups=[{ "metrics": ["precision@"], "thresholds": { "top_n": [100] }, }], db_engine=db_engine).needs_evaluations( matrix_store=train_matrix_store, model_id=model_with_evaluations.model_id, ) session.close() session.remove()