def run_training() -> None: data = load_data(filename=config.DATA_FILE) X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], test_size=0.4, random_state=42) y_train = encode_target(y_train) status_pipeline.fit(X_train, y_train) save_pipeline(pipeline_to_persist=status_pipeline)
def test_make_single_prediction(): # Given test_data = load_data(filename = 'test.csv') single_test = test_data[config.FEATURES][0:1] single_test_json = single_test.to_json(orient = 'records') # When subject = make_prediction(input_data = single_test_json) # Then assert subject is not None
def test_make_multiple_predictions(): # Given test_data = load_data(filename = 'test.csv') original_data_length = len(test_data) multiple_test = test_data[config.FEATURES] multiple_test_json = multiple_test.to_json(orient = 'records') # when subject = make_prediction(input_data = multiple_test_json) # Then assert subject is not None assert len(subject['predictions']) == 33149
def testing_prediction_endpoint_returns_prediction(flask_test_client): # Given test_data = load_data(filename='test.csv') features = test_data[model_config.FEATURES][0:10] features_json = features.to_json(orient='records') # When response = flask_test_client.post('/predict', json=features_json) # Then response_json = json.loads(response.data) prediction = response_json['predictions'] pred_len = len(prediction) assert response.status_code == 200 assert pred_len == 10
from sklearn.metrics import accuracy_score, classification_report from sklearn.model_selection import train_test_split import logging _logger = logging.getLogger(__name__) pipeline_file_name = f"{config.PIPELINE_SAVE_FILE}.pkl" _status_pipe = load_pipeline(filename=pipeline_file_name) def make_prediction(*, input_data): data = pd.read_json(input_data) predictions = _status_pipe.predict(data) results = {'predictions': predictions} return results if __name__ == '__main__': data = load_data(filename=config.DATA_FILE) X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], test_size=0.4, random_state=42) y_pred = _status_pipe.predict(X_test) y_test = encode_target(y_test) acc = accuracy_score(y_test, y_pred) print(f"test_accuracy: {acc}") print(classification_report(y_test, y_pred))