def test_predictor_get_train_columns(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) predictor = Predictor(project_path, model_storage_engine, db_engine) # The train_store uuid is stored in fake_trained_model. Storing the other MatrixFactory(matrix_uuid=test_store.uuid) session.commit() # Runs the same test for training and testing predictions for store, mat_type in zip((train_store, test_store), ("train", "test")): predict_proba = predictor.predict( model_id, store, misc_db_parameters=dict(), train_matrix_columns=train_store.columns()) # assert # 1. that we calculated predictions assert len(predict_proba) > 0 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute( '''select entity_id, as_of_date from {}_results.{}_predictions join model_metadata.models using (model_id)'''.format( mat_type, mat_type)) ] assert len(records) > 0
def test_calculate_and_save(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) calculator = IndividualImportanceCalculator(db_engine, methods=['sample'], replace=False) # given a trained model # and a test matrix _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) # i expect to be able to call calculate and save calculator.calculate_and_save_all_methods_and_dates( model_id, test_store) # and find individual importances in the results schema afterwards records = [ row for row in db_engine.execute('''select entity_id, as_of_date from test_results.individual_importances join model_metadata.models using (model_id)''') ] assert len(records) > 0 # and that when run again, has the same result calculator.calculate_and_save_all_methods_and_dates( model_id, test_store) new_records = [ row for row in db_engine.execute('''select entity_id, as_of_date from test_results.individual_importances join model_metadata.models using (model_id)''') ] assert len(records) == len(new_records) assert records == new_records
def test_predictor_get_train_columns(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) project_path = 'econ-dev/inspections' with tempfile.TemporaryDirectory() as temp_dir: train_store, test_store = sample_metta_csv_diff_order(temp_dir) model_storage_engine = InMemoryModelStorageEngine(project_path) _, model_id = \ fake_trained_model( project_path, model_storage_engine, db_engine, train_matrix_uuid=train_store.uuid ) predictor = Predictor(project_path, model_storage_engine, db_engine) predict_proba = predictor.predict( model_id, test_store, misc_db_parameters=dict(), train_matrix_columns=train_store.columns() ) # assert # 1. that we calculated predictions assert len(predict_proba) > 0 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute('''select entity_id, as_of_date from results.predictions join results.models using (model_id)''') ] assert len(records) > 0