def test_make_single_prediction(): # Given test_data = load_dataset(file_name='test.csv') single_test_json = test_data[0:1] # When subject = make_prediction(input_data=single_test_json) print(subject.get('predictions')[0]) # Then assert subject is not None assert isinstance((subject.get('predictions')[0]), np.integer) assert math.ceil(subject.get('predictions')[0]) == 0
def test_make_multiple_predictions(): # Given test_data = load_dataset(file_name='test.csv') original_data_length = len(test_data) multiple_test_json = test_data # When subject = make_prediction(input_data=multiple_test_json) # Then assert subject is not None assert len(subject.get('predictions')) == 100 # We expect some rows to be filtered out assert len(subject.get('predictions')) == original_data_length
def test_prediction_endpoint_returns_prediction(flask_test_client): # Given # Load the test data from the sainsbury_discontinued package # This is important as it makes it harder for the test # data versions to get confused by not spreading it # across packages. test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE) post_json = test_data[0:1].to_json(orient='records') # When response = flask_test_client.post('/v1/predict/classifier', json=json.loads(post_json)) # Then assert response.status_code == 200 response_json = json.loads(response.data) prediction = response_json['predictions'] response_version = response_json['version'] assert prediction[0] == 0 assert response_version == _version
def test_prediction_endpoint_validation_200(flask_test_client): # Given # Load the test data from the regression_model package. # This is important as it makes it harder for the test # data versions to get confused by not spreading it # across packages. test_data = load_dataset(file_name=config.TESTING_DATA_FILE) post_json = test_data.to_json(orient='records') # When response = flask_test_client.post('/v1/predict/classifier', json=json.loads(post_json)) # Then assert response.status_code == 200 response_json = json.loads(response.data) # Check correct number of errors removed assert len(response_json.get('predictions')) + len( response_json.get('errors')) == len(test_data)
def run_training() -> None: """Train the model.""" # load training data data = dm.load_dataset(file_name=config.TRAINING_DATA_FILE) # train and test split X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.3, random_state=1984, stratify=data[config.TARGET]) # fit pipeline pipeline.discontinued_pipe.fit(X_train[config.FEATURES], y_train) # add model version to logs _logger.info(f"saving model version: {_version}") # save pipeline dm.save_pipeline(pipeline_to_persist=pipeline.discontinued_pipe)
def make_predicitons(input_data): _discontinued_identifier = dm.load_pipeline(file_name=config.PIPELINE_PATH) result = _discontinued_identifier.predict(input_data) prob = _discontinued_identifier.predict_proba(input_data) prob_1 = prob[:, 1] return result, prob_1 if __name__ == '__main__': from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report # load training data data = dm.load_dataset(file_name=config.TRAINING_DATA_FILE) # # train and test split X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.3, random_state=1984, stratify=data[config.TARGET]) # prediction and propensity pred, prob_1 = make_predicitons(data[config.FEATURES]) data['pred'] = pred data['prob_1'] = prob_1 # # determine classifcation report