def test_xtrain_larger_than_xtest(): # given test_xtrain_data = load_dataset(file_name='xtrain.csv') test_xtrain_data_size = len(test_xtrain_data.index) # when test_xtest_data = load_dataset(file_name='xtest.csv') test_xtest_data_size = len(test_xtest_data.index) # then assert test_xtrain_data_size is not None assert test_xtest_data_size is not None assert test_xtrain_data_size > test_xtest_data_size
def test_make_single_prediction(): test_data = load_dataset(file_name='test.csv') single_test_input = test_data[0:1] subject = make_prediction(input_data=single_test_input) assert subject is not None assert isinstance(subject.get('prediction')[0], float) assert math.ceil(subject.get('prediction')[0]) == 112476
def test_xtest_playerNname_dtype(): # given test_data_xtest = load_dataset(file_name='xtest.csv') # when correct_dtype = np.dtype(np.int64) test_dtype_xtest = test_data_xtest["playerName"].dtypes # then assert test_dtype_xtest == correct_dtype
def test_xtrain_column_length(): # given test_data = load_dataset(file_name='xtrain.csv') test_data_column_len = len(test_data.columns) - 6 # when correct_column_len = len(cfg.FEATURE_LIST) # then assert test_data_column_len is not None assert test_data_column_len == correct_column_len
def test_prediction_endpoint_returns_prediction(flask_test_client): test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE) post_json = test_data[0:1].to_json(orient='records') response = flask_test_client.post('/v1/predict/regression', json=post_json) assert response.status_code == 200 response_json = json.loads(response.data) prediction = response_json['predictions'] response_version = response_json['version'] assert math.ceil(prediction) == 112476 assert response_version == _version
def test_prediction_endpoint_validation_200(flask_test_client): test_data = load_dataset(file_name=config.TESTING_DATA_FILE) post_json = test_data.to_json(orient='records') response = flask_test_client.post('/v1/predict/regression', json=post_json) assert response.status_code == 200 response_json = json.loads(response.data) assert len(response_json.get('predictions')) + len( response_json.get('errors')) == len(test_data)
def test_make_multiple_predictions(): test_data = load_dataset(file_name='test.csv') original_data_length = len(test_data) multiple_test_input = test_data subject = make_prediction(input_data=multiple_test_input) assert subject is not None assert len(subject.get('prediction')) == 1451 assert len(subject.get('prediction')) != original_data_length
def test_make_single_prediction(): # given test_data = load_dataset(file_name='xtest.csv') single_test_json = test_data[0:1].to_json(orient='records') # reads the 1st line of 'test.csv' # when subject = make_prediction(input_data=single_test_json) # print(subject.get('predictions')[0]) # the above print statement was used to get the correct predicted value for # line 22 ~ subject to change after editing # then assert subject is not None assert isinstance(subject.get('predictions')[0], float) # calling the 'predictions' keyword from the dict assert math.ceil(subject.get('predictions')[0]) == 16 # see line 17
def test_make_multiple_predictions(): # given test_data = load_dataset(file_name='xtest.csv') original_data_length = len(test_data) multiple_test_json = test_data.to_json(orient='records') # when subject = make_prediction(input_data=multiple_test_json) # print(original_data_length) # only work when tests fail # print(subject.get('predictions')) --> verified the points for each individual player is outputted # then assert subject is not None assert len(subject.get('predictions')) == 1034 # white box testing # we dont expect any rows to be filtered out assert len(subject.get('predictions')) == original_data_length
def run_training() -> None: """ Train the model""" # read training data data = load_dataset(file_name=config.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0) # Setting the seed # transform target y_train = np.log(y_train) # y_test = np.log(y_test) pipeline.price_pipe.fit(X_train[config.FEATURES], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training(): print('Training the model...') # read training data data = load_dataset(file_name=cfg.TRAINING_DATA_FILE) # divide train and test X_train, X_test, y_train, y_test = train_test_split( data[cfg.FEATURE_LIST], data[cfg.TARGET], test_size=0.2, random_state=0) # setting the seed here # if data tranformed then the target would be transformed # here as well pipeline.ffml_pipe.fit(X_train[cfg.FEATURE_LIST], y_train) _logger.info(f"saving model version: {_version}") save_pipeline(pipeline_to_persist=pipeline.ffml_pipe) print('Model trained...')