Example #1
0
def test_model_for_differential(*, save_file='test_data_predictions.csv'):
    previous_model_df = pd.read_csv(f'{api_config.PACKAGE_ROOT}/{save_file}')

    previous_model_predictions = previous_model_df.predictions.values
    print('previous predictions:', previous_model_predictions)

    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)

    test_data.drop('id', axis=1, inplace=True)
    test_data[config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES +
              config.DISCRETE_SET3_FEATURES] = test_data[
                  config.DISCRETE_SET1_FEATURES +
                  config.DISCRETE_SET2_FEATURES +
                  config.DISCRETE_SET3_FEATURES].astype(str)

    multiple_test_input = test_data[0:200]

    current_result = make_prediction(input_data=multiple_test_input)
    current_model_predictions = current_result.get('predictions')
    print('current predictions:', current_model_predictions)

    assert len(previous_model_predictions) == len(current_model_predictions)

    for previous_value, current_value in zip(previous_model_predictions,
                                             current_model_predictions):
        previous_value = previous_value.item()
        current_value = current_value.item()

        assert math.isclose(previous_value, current_value, rel_tol=1)
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='test.csv')
    single_test_json = test_data[0:1].to_json(orient='records')

    # When
    subject = make_prediction(input_data=single_test_json)

    # Then
    assert subject is not None
    assert isinstance(subject.get('predictions'), list)
    assert (subject.get('predictions')[0]) == 0
def predict():
    if request.method == 'POST':
        json_data = request.get_json()
        _logger.info(f'Inputs: {json_data}')

        result = make_prediction(input_data=json_data)
        _logger.info(f'Outputs: {result}')

        predictions = result.get('predictions')[0]
        version = result.get('version')

        return jsonify({'predictions': predictions, 'version': version})
def test_make_single_prediction():
	# Given
	test_data = load_data(filename = 'test.csv')
	single_test = test_data[config.FEATURES][0:1]
	single_test_json = single_test.to_json(orient = 'records')


	# When 
	subject = make_prediction(input_data = single_test_json)

	# Then
	assert subject is not None
Example #5
0
def capture_predictions(*, save_file:str = 'test_data_predictions.csv')	:
	test_data = load_dataset(file_name='test.csv')

	test_data.drop('id', axis=1, inplace=True)
	test_data[config.DISCRETE_SET1_FEATURES+config.DISCRETE_SET2_FEATURES+config.DISCRETE_SET3_FEATURES]=test_data[config.DISCRETE_SET1_FEATURES+config.DISCRETE_SET2_FEATURES+config.DISCRETE_SET3_FEATURES].astype(str)

	multiple_test_json = test_data[0:200]

	predictions = make_prediction(input_data=multiple_test_json)

	predictions_df = pd.DataFrame(predictions)

	predictions_df.to_csv(f'{api_config.PACKAGE_ROOT}/{save_file}')
def test_make_multiple_predictions():
	# Given 
	test_data = load_data(filename = 'test.csv')
	original_data_length = len(test_data)
	multiple_test = test_data[config.FEATURES]
	multiple_test_json = multiple_test.to_json(orient = 'records')

	# when
	subject = make_prediction(input_data = multiple_test_json)

	# Then
	assert subject is not None
	assert len(subject['predictions']) == 33149
Example #7
0
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='test.csv')
    single_test_input = test_data[0:1]

    # When
    subject = make_prediction(input_data=single_test_input)
    S1 = subject.get('predictions')[0]

    # Then
    assert subject is not None
    assert isinstance(S1, np.int64)
    assert math.ceil(subject.get('predictions')[0]) == 0
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='test.csv')
    #original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient='records')

    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    assert isinstance(subject.get('predictions'), list)
    assert (subject.get('predictions')).count(1) == 45931
    assert (subject.get('predictions')).count(0) == 34069
Example #9
0
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='test.csv')
    # print(test_data)
    single_test_json = test_data[0:1].to_json(orient='records')
    # print(single_test_json)
    # When
    subject = make_prediction(input_data=single_test_json)
    #print(subject)
    # Then
    print(type(subject.get('predictions')[0]))
    assert subject is not None
    #assert isinstance(subject.get('predictions')[0], 0)
    assert math.ceil(subject.get('predictions')[0]) == 0
def test_single_prediction():
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    test_data[config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES +
              config.DISCRETE_SET3_FEATURES] = test_data[
                  config.DISCRETE_SET1_FEATURES +
                  config.DISCRETE_SET2_FEATURES +
                  config.DISCRETE_SET3_FEATURES].astype(str)
    single_test_input = test_data[0:1]

    subject = make_prediction(input_data=single_test_input[config.FEATURES])

    assert subject is not None
    assert isinstance(subject.get('predictions')[0], np.int64)
    assert math.ceil(subject.get('predictions')[0] == 0)
Example #11
0
def capture_predictions():
    """ Save a slice of the predictions from the test data """

    save_file = "test_data_predictions.csv"
    test_data = utils.load_dataset(
        filename=model_config.app_config.TESTING_DATA_FILE)

    # Taking a slice of the test dataset
    multiple_test_input = test_data.iloc[100:700, :]

    predictions = predict.make_prediction(input_data=multiple_test_input)

    # Saving to the package root
    predictions_df = pd.DataFrame(predictions)
    predictions_df.to_csv(f"{config.PACKAGE_ROOT}/{save_file}")
Example #12
0
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='test.csv')
    original_data_length = len(test_data)
    multiple_test_input = test_data

    # When
    subject = make_prediction(input_data=multiple_test_input)

    # Then
    assert subject is not None
    assert len(subject.get('predictions')) == 417

    # We expect some rows to be filtered out
    assert len(subject.get('predictions')) != original_data_length
Example #13
0
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='test.csv')
    original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient='records')

    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    #assert len(subject.get('predictions')) == 1451

    # We expect some rows to be filtered out

# assert len(subject.get('predictions')) != original_data_length
def test_make_prediction(sample_input_data):
    # Given
    expected_no_predictions = 131

    # When
    result = make_prediction(input_data=sample_input_data)

    # Then
    predictions = result.get("predictions")
    assert isinstance(predictions, np.ndarray)
    assert isinstance(predictions[0], np.int64)
    assert result.get("errors") is None
    assert len(predictions) == expected_no_predictions
    _predictions = list(predictions)
    y_true = sample_input_data["survived"]
    accuracy = accuracy_score(_predictions, y_true)
    assert accuracy > 0.7
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='test.csv')
    single_test_json = test_data[0:1].to_json(
        orient='records')  # Get a single instance

    # When
    subject = make_prediction(
        input_data=single_test_json)  #Call the clf to make a prediction

    # Then
    assert subject is not None  #assert the prediction is not empty
    assert isinstance(
        subject.get('predictions')[0],
        np.int64)  #ensure the preduction returns either 0,1 ->int64
    assert math.ceil(subject.get('predictions')
                     [0]) == 0  # We now that the first row preidction is 0
def capture_predictions() -> None:
    """Save the test data predictions to a CSV."""

    save_file = 'test_data_predictions.csv'
    test_data = load_dataset(file_name='test.csv')

    # we take a slice with no input validation issues
    multiple_test_input = test_data[99:600]

    predictions = make_prediction(input_data=multiple_test_input)

    # save predictions for the test dataset
    predictions_df = pd.DataFrame(predictions)

    # hack here to save the file to the classification model
    # package of the repo, not the installed package
    predictions_df.to_csv(f'{config.PACKAGE_ROOT}/{save_file}')
Example #17
0
def predict():
    if request.method == 'POST':

        json_data = request.get_json()
        _logger.debug(f'Inputs: {json_data}')

        input_data, errors = validate_inputs(input_data=json_data)

        result = make_prediction(input_data=input_data)
        _logger.debug(f'Outputs: {result}')

        predictions = result.get('predictions').tolist()
        version = result.get('version')

        return jsonify({
            'predictions': predictions,
            'version': version,
            'errors': errors
        })
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='test.csv')
    original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient='records')

    true_predictions = [0, 1, 0, 1]

    print("#####################")
    print(multiple_test_json)

    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    assert len(subject.get('predictions')) == 4
    for i, pred in enumerate(subject.get('predictions')):
        print(i)
        assert pred == true_predictions[i]
Example #19
0
    def make_save_predictions(self, *, input_data, db_model, app, json_data):
        """ Make the prediciton and persist it """
        with app.app_context():
            # NEURALNET
            if db_model == ModelType.NEURALNET:

                # Making the predictions
                result = dl_make_prediction(input_data=input_data)
                _logger.info(f"Outputs : {result}")

                predictions = result.get("predictions").tolist()
                version = result.get("version")

                # Save predictions
                persistence = PredictionPersistence(
                    db_session=current_app.db_session)
                persistence.save_predictions(
                    inputs=json_data,
                    model_version=version,
                    predictions=predictions,
                    db_model=ModelType.NEURALNET,
                )

            elif db_model == ModelType.GRADIENT_BOOSTING:
                # GBM
                # Making the predictions
                result = make_prediction(input_data=input_data)
                _logger.info(f"Outputs : {result}")

                predictions = result.get("predictions").tolist()
                version = result.get("version")

                # Save predictions
                persistence = PredictionPersistence(
                    db_session=current_app.db_session)
                persistence.save_predictions(
                    inputs=json_data,
                    model_version=version,
                    predictions=predictions,
                    db_model=ModelType.GRADIENT_BOOSTING,
                )
def predict():
    if request.method == 'POST':
        # Step 1: Extract POST data from request body as JSON
        json_data = request.get_json()
        _logger.debug(f'Inputs: {json_data}')

        # Step 2: Validate the input using marshmallow schema
        input_data, errors = validate_inputs(input_data=json_data)

        # Step 3: Model prediction
        result = make_prediction(input_data=input_data)
        _logger.debug(f'Outputs: {result}')

        # Step 4: Convert numpy ndarray to list
        predictions = result.get('predictions').tolist()
        version = result.get('version')

        # Step 5: Return the response as JSON
        return jsonify({'predictions': predictions,
                        'version': version,
                        'errors': errors})
Example #21
0
def test_model_prediction_differential(
        *,
        save_file: str = 'test_data_predictions.csv'):
    """
    This test compares the prediction result similarity of
    the current model with the previous model's results.
    """

    # Given
    # Load the saved previous model predictions
    previous_model_df = pd.read_csv(f'{config.PACKAGE_ROOT}/{save_file}')
    previous_model_predictions = previous_model_df.predictions.values

    test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE)
    multiple_test_input = test_data[99:600]

    # When
    current_result = make_prediction(input_data=multiple_test_input)
    current_model_predictions = current_result.get('predictions')

    # Then
    # diff the current model vs. the old model
    assert len(previous_model_predictions) == len(
        current_model_predictions)

    # Perform the differential test
    for previous_value, current_value in zip(
            previous_model_predictions, current_model_predictions):

        # convert numpy float64 to Python float.
        previous_value = previous_value.item()
        current_value = current_value.item()

        # rel_tol is the relative tolerance – it is the maximum allowed
        # difference between a and b, relative to the larger absolute
        # value of a or b. For example, to set a tolerance of 5%, pass
        # rel_tol=0.05.
        assert math.isclose(previous_value,
                            current_value,
                            rel_tol=model_config.ACCEPTABLE_MODEL_DIFFERENCE)
def test_multiple_predictions():
    # Given
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    test_data.drop('id', axis=1, inplace=True)
    test_data[config.DISCRETE_SET1_FEATURES + config.DISCRETE_SET2_FEATURES +
              config.DISCRETE_SET3_FEATURES] = test_data[
                  config.DISCRETE_SET1_FEATURES +
                  config.DISCRETE_SET2_FEATURES +
                  config.DISCRETE_SET3_FEATURES].astype(str)

    original_length = len(test_data)
    multiple_test_input = test_data

    # When
    subject = make_prediction(input_data=multiple_test_input)

    # Then
    assert subject is not None
    #print(multiple_test_input)
    #print(original_length)
    #print(subject)
    assert len(subject.get('predictions')) == 127037
Example #23
0
def predict():
    if request.method == "POST":
        # Extract data from the json
        # get_json output is a str and json.loads outputs us a list(dict) that can be transformed
        # into a dataframe and that is what the predict.make_prediction function is expecting as an input.
        # NOT REALLY ANYMORE
        json_data = request.get_json()
        _logger.info(f"Inputs  : {json_data}"
                    f"model : {ModelType.GRADIENT_BOOSTING.name}"
                    f"model_version : {_version}"
                    )

        # Check if the data is valid
        input_data,errors = validation.validate_data(json_data)

        # Making the predictions
        result = make_prediction(input_data=input_data)
        _logger.info(f"Outputs : {result}")

        predictions = result.get("predictions").tolist()

        version = result.get("version")

        # Persisting the predictions
        persistence = PredictionPersistence(db_session=current_app.db_session)

        persistence.save_predictions(
            inputs=json_data,
            model_version=version,
            predictions=predictions,
            db_model=ModelType.GRADIENT_BOOSTING,
        )

        # Asynchronous shadow mode
        if current_app.config.get("SHADOW_MODE_ACTIVE"):
            _logger.debug(
                f"Calling shadow model asynchronously: "
                f"{ModelType.NEURALNET.value}"
            )
            thread = threading.Thread(
                target=persistence.make_save_predictions,
                kwargs={
                    "db_model": ModelType.NEURALNET,
                    "input_data": input_data,
                    "app": current_app._get_current_object(),
                    "json_data": json_data
                },
            )
            thread.start()

        # Monitoring
        for pred in predictions:
            if pred == "functional":
                PREDICTION_Counter_HEALTHY_WATER_PUMPS.labels(
                app_name=APP_NAME,
                model_name=ModelType.GRADIENT_BOOSTING.name,
                model_version=_version).inc()
            elif pred == "non functional or functional needs repair":
                PREDICTION_Counter_FAULTY_WATER_PUMPS.labels(
                app_name=APP_NAME,
                model_name=ModelType.GRADIENT_BOOSTING.name,
                model_version=_version).inc()

        return jsonify({"predictions": predictions,
                        "errors" : errors,
                        "version": version})