def test_predictor(): with testing.postgresql.Postgresql() as postgresql: db_engine = create_engine(postgresql.url()) ensure_db(db_engine) init_engine(db_engine) with mock_s3(): s3_conn = boto3.resource('s3') s3_conn.create_bucket(Bucket='econ-dev') project_path = 'econ-dev/inspections' model_storage_engine = S3ModelStorageEngine(project_path) _, model_id = \ fake_trained_model(project_path, model_storage_engine, db_engine, train_matrix_uuid='1234') predictor = Predictor(project_path, model_storage_engine, db_engine) # create prediction set matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': [7, 8] }).set_index('entity_id') metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE, 'label_timespan': '3month', 'metta-uuid': '1234', 'indices': ['entity_id'], } train_matrix_columns = ['feature_one', 'feature_two'] # Runs the same test for training and testing predictions for mat_type in ("train", "test"): # Create the matrix to be tested and store in db metadata['matrix_type'] = mat_type matrix_store = InMemoryMatrixStore(matrix, metadata) # Note, the first time 'matrix' is used, the label column is popped. # It must be added back in to 'matrix' to create another matrix_store. matrix['label'] = [7, 8] predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns) # assert # 1. that the returned predictions are of the desired length assert len(predict_proba) == 2 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute( '''select entity_id, as_of_date from {}_results.{}_predictions join model_metadata.models using (model_id)'''.format( mat_type, mat_type)) ] assert len(records) == 2 # 3. that the contained as_of_dates match what we sent in for record in records: assert record[1].date() == AS_OF_DATE # 4. that the entity ids match the given dataset assert sorted([record[0] for record in records]) == [1, 2] # 5. running with same model_id, different as of date # then with same as of date only replaces the records # with the same date new_matrix = pandas.DataFrame.from_dict({ 'entity_id': [1, 2], 'feature_one': [3, 4], 'feature_two': [5, 6], 'label': [7, 8] }).set_index('entity_id') new_metadata = { 'label_name': 'label', 'end_time': AS_OF_DATE + datetime.timedelta(days=1), 'label_timespan': '3month', 'metta-uuid': '1234', 'indices': ['entity_id'], } # Runs the same test for training and testing predictions for mat_type in ("train", "test"): # Create the matrix to be tested and store in db new_metadata['matrix_type'] = mat_type new_matrix_store = InMemoryMatrixStore(new_matrix, new_metadata) # Adding 'label' column back into new_matrix new_matrix['label'] = [7, 8] predictor.predict(model_id, new_matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns) predictor.predict(model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns) records = [ row for row in db_engine.execute( '''select entity_id, as_of_date from {}_results.{}_predictions join model_metadata.models using (model_id)'''.format( mat_type, mat_type)) ] assert len(records) == 4 # 6. That we can delete the model when done prediction on it predictor.delete_model(model_id) assert predictor.load_model(model_id) == None
def test_predictor_entity_index(): with prepare() as (project_storage, db_engine, model_id): predictor = Predictor(project_storage.model_storage_engine(), db_engine) # Runs the same test for training and testing predictions for mat_type in ("train", "test"): matrix = matrix_creator(index="entity_id") metadata = matrix_metadata_creator(end_time=AS_OF_DATE, matrix_type=mat_type, indices=["entity_id"]) matrix_store = get_matrix_store(project_storage, matrix, metadata) train_matrix_columns = matrix.columns[0:-1].tolist() predict_proba = predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns, ) # assert # 1. that the returned predictions are of the desired length assert len(predict_proba) == 2 # 2. that the predictions table entries are present and # can be linked to the original models records = [ row for row in db_engine.execute("""select entity_id, as_of_date from {}_results.predictions join model_metadata.models using (model_id)""".format( mat_type, mat_type)) ] assert len(records) == 2 # 3. that the contained as_of_dates match what we sent in for record in records: assert record[1].date() == AS_OF_DATE # 4. that the entity ids match the given dataset assert sorted([record[0] for record in records]) == [1, 2] # 5. running with same model_id, different as of date # then with same as of date only replaces the records # with the same date # Runs the same test for training and testing predictions for mat_type in ("train", "test"): new_matrix = matrix_creator(index="entity_id") new_metadata = matrix_metadata_creator( end_time=AS_OF_DATE + datetime.timedelta(days=1), matrix_type=mat_type, indices=["entity_id"], ) new_matrix_store = get_matrix_store(project_storage, new_matrix, new_metadata) predictor.predict( model_id, new_matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns, ) predictor.predict( model_id, matrix_store, misc_db_parameters=dict(), train_matrix_columns=train_matrix_columns, ) records = [ row for row in db_engine.execute("""select entity_id, as_of_date from {}_results.predictions join model_metadata.models using (model_id)""".format( mat_type, mat_type)) ] assert len(records) == 4 # 6. That we can delete the model when done prediction on it predictor.delete_model(model_id) assert predictor.load_model(model_id) is None