def test_prediction_quality_against_another_model(raw_training_data, sample_input_data): # Given input_df = raw_training_data.drop(config.model_config.target, axis=1) output_df = raw_training_data[config.model_config.target] current_predictions = make_prediction(input_data=input_df) # the older model has these variable names reversed input_df.rename( columns={ "FirstFlrSF": "1stFlrSF", "SecondFlrSF": "2ndFlrSF", "ThreeSsnPortch": "3SsnPorch", }, inplace=True, ) alternative_predictions = alt_make_prediction(input_data=input_df) # When current_mse = mean_squared_error(y_true=output_df.values, y_pred=current_predictions["predictions"]) alternative_mse = mean_squared_error( y_true=output_df.values, y_pred=alternative_predictions["predictions"]) # Then assert current_mse < alternative_mse
def test_prediction_quality_against_benchmark(raw_training_data, sample_input_data): # Given input_df = raw_training_data.drop(config.model_config.target, axis=1) output_df = raw_training_data[config.model_config.target] # Generate rough benchmarks (you would tweak depending on your model) benchmark_flexibility = 50000 # setting ndigits to -4 will round the value to the nearest 10,000 i.e. 210,000 benchmark_lower_boundary = (round(output_df.iloc[0], ndigits=-4) - benchmark_flexibility ) # 210,000 - 50000 = 160000 benchmark_upper_boundary = (round(output_df.iloc[0], ndigits=-4) + benchmark_flexibility ) # 210000 + 50000 = 260000 # When subject = make_prediction(input_data=input_df[0:1]) # Then assert subject is not None assert isinstance(subject.get("predictions")[0], float) prediction = subject.get("predictions")[0] assert isinstance(prediction, float) assert prediction > benchmark_lower_boundary assert prediction < benchmark_upper_boundary
def predict(): if request.method == "POST": # Step 1: Extract POST data from request body as JSON json_data = request.get_json() # Step 2: Access the model prediction function (also validates data) result = make_prediction(input_data=json_data) # Step 3: Handle errors errors = result.get("errors") if errors: return Response(json.dumps(errors), status=400) # Step 4: Split out results predictions = result.get("predictions").tolist() version = result.get("version") # Step 5: Save predictions persistence = PredictionPersistence(db_session=current_app.db_session) persistence.save_predictions( inputs=json_data, model_version=version, predictions=predictions, db_model=ModelType.GRADIENT_BOOSTING, ) # Step 6: Prepare prediction response return jsonify({ "predictions": predictions, "version": version, "errors": errors })
def predict_previous(): if request.method == "POST": # Step 1: Extract POST data from request body as JSON json_data = request.get_json() # Step 2: Access the model prediction function (also validates data) result = make_prediction(input_data=json_data) # Step 3: Handle errors errors = result.get("errors") if errors: return Response(json.dumps(errors), status=400) # Step 4: Split out results predictions = result.get("predictions").tolist() version = result.get("version") # Step 5: Prepare prediction response return jsonify({ "predictions": predictions, "version": version, "errors": errors })