def test_make_single_prediction(): # Given test_data = load_dataset(file_name='test.csv') single_test_input = test_data[0:1] # When subject = make_prediction(input_data=single_test_input) # Then assert subject is not None assert isinstance(subject.get('predictions')[0], float) assert math.ceil(subject.get('predictions')[0]) == 112476
def test_make_multiple_predictions(): # Given test_data = load_dataset(file_name='test.csv') original_data_length = len(test_data) multiple_test_input = test_data # When subject = make_prediction(input_data=multiple_test_input) # Then assert subject is not None assert len(subject.get('predictions')) == 1451 # We expect some rows to be filtered out assert len(subject.get('predictions')) != original_data_length
def capture_predictions() -> None: """Save the test data predictions to a CSV.""" save_file = 'test_data_predictions.csv' test_data = load_dataset(file_name='test.csv') # we take a slice with no input validation issues multiple_test_input = test_data[99:600] predictions = make_prediction(input_data=multiple_test_input) # save predictions for the test dataset predictions_df = pd.DataFrame(predictions) # hack here to save the file to the regression model # package of the repo, not the installed package predictions_df.to_csv(f'{config.PACKAGE_ROOT}/{save_file}')
def test_model_prediction_differential( *, save_file: str = 'test_data_predictions.csv'): """ This test compares the prediction result similarity of the current model with the previous model's results. """ # Given # Load the saved previous model predictions previous_model_df = pd.read_csv(f'{config.PACKAGE_ROOT}/{save_file}') previous_model_predictions = previous_model_df.predictions.values test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE) multiple_test_input = test_data[99:600] # When current_result = make_prediction(input_data=multiple_test_input) current_model_predictions = current_result.get('predictions') # Then # diff the current model vs. the old model assert len(previous_model_predictions) == len(current_model_predictions) # Perform the differential test for previous_value, current_value in zip(previous_model_predictions, current_model_predictions): # convert numpy float64 to Python float. previous_value = previous_value.item() current_value = current_value.item() # rel_tol is the relative tolerance – it is the maximum allowed # difference between a and b, relative to the larger absolute # value of a or b. For example, to set a tolerance of 5%, pass # rel_tol=0.05. assert math.isclose(previous_value, current_value, rel_tol=model_config.ACCEPTABLE_MODEL_DIFFERENCE)
def predict(): if request.method == 'POST': # Step 1: Extract POST data from request body as JSON json_data = request.get_json() _logger.debug(f'Inputs: {json_data}') # Step 2: Validate the input using marshmallow schema input_data, errors = validate_inputs(input_data=json_data) # Step 3: Model prediction result = make_prediction(input_data=input_data) _logger.debug(f'Outputs: {result}') # Step 4: Convert numpy ndarray to list predictions = result.get('predictions').tolist() version = result.get('version') # Step 5: Return the response as JSON return jsonify({ 'predictions': predictions, 'version': version, 'errors': errors })