def test_model_for_differential(*, save_file='test_data_predictions.csv'): previous_model_df = pd.read_csv(f'{api_config.PACKAGE_ROOT}/{save_file}') previous_model_predictions = previous_model_df.predictions.values print('previous predictions:', previous_model_predictions) test_data = load_dataset(file_name=config.TESTING_DATA_FILE) test_data.drop('id', axis=1, inplace=True) test_data[config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES + config.DISCRETE_SET3_FEATURES] = test_data[ config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES + config.DISCRETE_SET3_FEATURES].astype(str) multiple_test_input = test_data[0:200] current_result = make_prediction(input_data=multiple_test_input) current_model_predictions = current_result.get('predictions') print('current predictions:', current_model_predictions) assert len(previous_model_predictions) == len(current_model_predictions) for previous_value, current_value in zip(previous_model_predictions, current_model_predictions): previous_value = previous_value.item() current_value = current_value.item() assert math.isclose(previous_value, current_value, rel_tol=1)
def test_make_single_prediction(): # Given test_data = load_dataset(file_name='test.csv') single_test_json = test_data[0:1].to_json(orient='records') # When subject = make_prediction(input_data=single_test_json) # Then assert subject is not None assert isinstance(subject.get('predictions'), list) assert (subject.get('predictions')[0]) == 0
def predict(): if request.method == 'POST': json_data = request.get_json() _logger.info(f'Inputs: {json_data}') result = make_prediction(input_data=json_data) _logger.info(f'Outputs: {result}') predictions = result.get('predictions')[0] version = result.get('version') return jsonify({'predictions': predictions, 'version': version})
def test_make_single_prediction(): # Given test_data = load_data(filename = 'test.csv') single_test = test_data[config.FEATURES][0:1] single_test_json = single_test.to_json(orient = 'records') # When subject = make_prediction(input_data = single_test_json) # Then assert subject is not None
def capture_predictions(*, save_file:str = 'test_data_predictions.csv') : test_data = load_dataset(file_name='test.csv') test_data.drop('id', axis=1, inplace=True) test_data[config.DISCRETE_SET1_FEATURES+config.DISCRETE_SET2_FEATURES+config.DISCRETE_SET3_FEATURES]=test_data[config.DISCRETE_SET1_FEATURES+config.DISCRETE_SET2_FEATURES+config.DISCRETE_SET3_FEATURES].astype(str) multiple_test_json = test_data[0:200] predictions = make_prediction(input_data=multiple_test_json) predictions_df = pd.DataFrame(predictions) predictions_df.to_csv(f'{api_config.PACKAGE_ROOT}/{save_file}')
def test_make_multiple_predictions(): # Given test_data = load_data(filename = 'test.csv') original_data_length = len(test_data) multiple_test = test_data[config.FEATURES] multiple_test_json = multiple_test.to_json(orient = 'records') # when subject = make_prediction(input_data = multiple_test_json) # Then assert subject is not None assert len(subject['predictions']) == 33149
def test_make_single_prediction(): # Given test_data = load_dataset(file_name='test.csv') single_test_input = test_data[0:1] # When subject = make_prediction(input_data=single_test_input) S1 = subject.get('predictions')[0] # Then assert subject is not None assert isinstance(S1, np.int64) assert math.ceil(subject.get('predictions')[0]) == 0
def test_make_multiple_predictions(): # Given test_data = load_dataset(file_name='test.csv') #original_data_length = len(test_data) multiple_test_json = test_data.to_json(orient='records') # When subject = make_prediction(input_data=multiple_test_json) # Then assert subject is not None assert isinstance(subject.get('predictions'), list) assert (subject.get('predictions')).count(1) == 45931 assert (subject.get('predictions')).count(0) == 34069
def test_make_single_prediction(): # Given test_data = load_dataset(file_name='test.csv') # print(test_data) single_test_json = test_data[0:1].to_json(orient='records') # print(single_test_json) # When subject = make_prediction(input_data=single_test_json) #print(subject) # Then print(type(subject.get('predictions')[0])) assert subject is not None #assert isinstance(subject.get('predictions')[0], 0) assert math.ceil(subject.get('predictions')[0]) == 0
def test_single_prediction(): test_data = load_dataset(file_name=config.TESTING_DATA_FILE) test_data[config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES + config.DISCRETE_SET3_FEATURES] = test_data[ config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES + config.DISCRETE_SET3_FEATURES].astype(str) single_test_input = test_data[0:1] subject = make_prediction(input_data=single_test_input[config.FEATURES]) assert subject is not None assert isinstance(subject.get('predictions')[0], np.int64) assert math.ceil(subject.get('predictions')[0] == 0)
def capture_predictions(): """ Save a slice of the predictions from the test data """ save_file = "test_data_predictions.csv" test_data = utils.load_dataset( filename=model_config.app_config.TESTING_DATA_FILE) # Taking a slice of the test dataset multiple_test_input = test_data.iloc[100:700, :] predictions = predict.make_prediction(input_data=multiple_test_input) # Saving to the package root predictions_df = pd.DataFrame(predictions) predictions_df.to_csv(f"{config.PACKAGE_ROOT}/{save_file}")
def test_make_multiple_predictions(): # Given test_data = load_dataset(file_name='test.csv') original_data_length = len(test_data) multiple_test_input = test_data # When subject = make_prediction(input_data=multiple_test_input) # Then assert subject is not None assert len(subject.get('predictions')) == 417 # We expect some rows to be filtered out assert len(subject.get('predictions')) != original_data_length
def test_make_multiple_predictions(): # Given test_data = load_dataset(file_name='test.csv') original_data_length = len(test_data) multiple_test_json = test_data.to_json(orient='records') # When subject = make_prediction(input_data=multiple_test_json) # Then assert subject is not None #assert len(subject.get('predictions')) == 1451 # We expect some rows to be filtered out # assert len(subject.get('predictions')) != original_data_length
def test_make_prediction(sample_input_data): # Given expected_no_predictions = 131 # When result = make_prediction(input_data=sample_input_data) # Then predictions = result.get("predictions") assert isinstance(predictions, np.ndarray) assert isinstance(predictions[0], np.int64) assert result.get("errors") is None assert len(predictions) == expected_no_predictions _predictions = list(predictions) y_true = sample_input_data["survived"] accuracy = accuracy_score(_predictions, y_true) assert accuracy > 0.7
def test_make_single_prediction(): # Given test_data = load_dataset(file_name='test.csv') single_test_json = test_data[0:1].to_json( orient='records') # Get a single instance # When subject = make_prediction( input_data=single_test_json) #Call the clf to make a prediction # Then assert subject is not None #assert the prediction is not empty assert isinstance( subject.get('predictions')[0], np.int64) #ensure the preduction returns either 0,1 ->int64 assert math.ceil(subject.get('predictions') [0]) == 0 # We now that the first row preidction is 0
def capture_predictions() -> None: """Save the test data predictions to a CSV.""" save_file = 'test_data_predictions.csv' test_data = load_dataset(file_name='test.csv') # we take a slice with no input validation issues multiple_test_input = test_data[99:600] predictions = make_prediction(input_data=multiple_test_input) # save predictions for the test dataset predictions_df = pd.DataFrame(predictions) # hack here to save the file to the classification model # package of the repo, not the installed package predictions_df.to_csv(f'{config.PACKAGE_ROOT}/{save_file}')
def predict(): if request.method == 'POST': json_data = request.get_json() _logger.debug(f'Inputs: {json_data}') input_data, errors = validate_inputs(input_data=json_data) result = make_prediction(input_data=input_data) _logger.debug(f'Outputs: {result}') predictions = result.get('predictions').tolist() version = result.get('version') return jsonify({ 'predictions': predictions, 'version': version, 'errors': errors })
def test_make_multiple_predictions(): # Given test_data = load_dataset(file_name='test.csv') original_data_length = len(test_data) multiple_test_json = test_data.to_json(orient='records') true_predictions = [0, 1, 0, 1] print("#####################") print(multiple_test_json) # When subject = make_prediction(input_data=multiple_test_json) # Then assert subject is not None assert len(subject.get('predictions')) == 4 for i, pred in enumerate(subject.get('predictions')): print(i) assert pred == true_predictions[i]
def make_save_predictions(self, *, input_data, db_model, app, json_data): """ Make the prediciton and persist it """ with app.app_context(): # NEURALNET if db_model == ModelType.NEURALNET: # Making the predictions result = dl_make_prediction(input_data=input_data) _logger.info(f"Outputs : {result}") predictions = result.get("predictions").tolist() version = result.get("version") # Save predictions persistence = PredictionPersistence( db_session=current_app.db_session) persistence.save_predictions( inputs=json_data, model_version=version, predictions=predictions, db_model=ModelType.NEURALNET, ) elif db_model == ModelType.GRADIENT_BOOSTING: # GBM # Making the predictions result = make_prediction(input_data=input_data) _logger.info(f"Outputs : {result}") predictions = result.get("predictions").tolist() version = result.get("version") # Save predictions persistence = PredictionPersistence( db_session=current_app.db_session) persistence.save_predictions( inputs=json_data, model_version=version, predictions=predictions, db_model=ModelType.GRADIENT_BOOSTING, )
def predict(): if request.method == 'POST': # Step 1: Extract POST data from request body as JSON json_data = request.get_json() _logger.debug(f'Inputs: {json_data}') # Step 2: Validate the input using marshmallow schema input_data, errors = validate_inputs(input_data=json_data) # Step 3: Model prediction result = make_prediction(input_data=input_data) _logger.debug(f'Outputs: {result}') # Step 4: Convert numpy ndarray to list predictions = result.get('predictions').tolist() version = result.get('version') # Step 5: Return the response as JSON return jsonify({'predictions': predictions, 'version': version, 'errors': errors})
def test_model_prediction_differential( *, save_file: str = 'test_data_predictions.csv'): """ This test compares the prediction result similarity of the current model with the previous model's results. """ # Given # Load the saved previous model predictions previous_model_df = pd.read_csv(f'{config.PACKAGE_ROOT}/{save_file}') previous_model_predictions = previous_model_df.predictions.values test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE) multiple_test_input = test_data[99:600] # When current_result = make_prediction(input_data=multiple_test_input) current_model_predictions = current_result.get('predictions') # Then # diff the current model vs. the old model assert len(previous_model_predictions) == len( current_model_predictions) # Perform the differential test for previous_value, current_value in zip( previous_model_predictions, current_model_predictions): # convert numpy float64 to Python float. previous_value = previous_value.item() current_value = current_value.item() # rel_tol is the relative tolerance – it is the maximum allowed # difference between a and b, relative to the larger absolute # value of a or b. For example, to set a tolerance of 5%, pass # rel_tol=0.05. assert math.isclose(previous_value, current_value, rel_tol=model_config.ACCEPTABLE_MODEL_DIFFERENCE)
def test_multiple_predictions(): # Given test_data = load_dataset(file_name=config.TESTING_DATA_FILE) test_data.drop('id', axis=1, inplace=True) test_data[config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES + config.DISCRETE_SET3_FEATURES] = test_data[ config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES + config.DISCRETE_SET3_FEATURES].astype(str) original_length = len(test_data) multiple_test_input = test_data # When subject = make_prediction(input_data=multiple_test_input) # Then assert subject is not None #print(multiple_test_input) #print(original_length) #print(subject) assert len(subject.get('predictions')) == 127037
def predict(): if request.method == "POST": # Extract data from the json # get_json output is a str and json.loads outputs us a list(dict) that can be transformed # into a dataframe and that is what the predict.make_prediction function is expecting as an input. # NOT REALLY ANYMORE json_data = request.get_json() _logger.info(f"Inputs : {json_data}" f"model : {ModelType.GRADIENT_BOOSTING.name}" f"model_version : {_version}" ) # Check if the data is valid input_data,errors = validation.validate_data(json_data) # Making the predictions result = make_prediction(input_data=input_data) _logger.info(f"Outputs : {result}") predictions = result.get("predictions").tolist() version = result.get("version") # Persisting the predictions persistence = PredictionPersistence(db_session=current_app.db_session) persistence.save_predictions( inputs=json_data, model_version=version, predictions=predictions, db_model=ModelType.GRADIENT_BOOSTING, ) # Asynchronous shadow mode if current_app.config.get("SHADOW_MODE_ACTIVE"): _logger.debug( f"Calling shadow model asynchronously: " f"{ModelType.NEURALNET.value}" ) thread = threading.Thread( target=persistence.make_save_predictions, kwargs={ "db_model": ModelType.NEURALNET, "input_data": input_data, "app": current_app._get_current_object(), "json_data": json_data }, ) thread.start() # Monitoring for pred in predictions: if pred == "functional": PREDICTION_Counter_HEALTHY_WATER_PUMPS.labels( app_name=APP_NAME, model_name=ModelType.GRADIENT_BOOSTING.name, model_version=_version).inc() elif pred == "non functional or functional needs repair": PREDICTION_Counter_FAULTY_WATER_PUMPS.labels( app_name=APP_NAME, model_name=ModelType.GRADIENT_BOOSTING.name, model_version=_version).inc() return jsonify({"predictions": predictions, "errors" : errors, "version": version})