def test_prediction_endpoint_returns_prediction(flask_test_client):

    # Load the test data from the classification_titanic package
    # This is important as it makes it harder for the test
    # data versions to get confused by not spreading it
    # across packages
    test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE)
    post_json = test_data[0:1].to_json(orient='records')

    print(f"POST JSON {post_json}")

    response = flask_test_client.post('/v1/predict/classification',
                                      json=post_json)

    assert response.status_code == 200

    response_json = json.loads(response.data)
    if isinstance(response_json['predictions'], list):
        prediction = response_json['predictions'][0]
    else:
        prediction = response_json['predictions']
    response_version = response_json['version']

    assert isinstance(prediction, int)
    assert response_version == _version
Esempio n. 2
0
def test_make_single_prediction():
    # Data
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    single_test_json = test_data[0:1].to_json(orient='records')

    # Predict
    subject = make_prediction(input_data=single_test_json)

    # Test
    assert subject is not None
    assert isinstance(subject.get("predictions")[0], (int, np.integer))
Esempio n. 3
0
def test_make_multiple_predictions():
    # Data
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient="records")

    # Predict
    subject = make_prediction(input_data=multiple_test_json)

    # Test
    assert subject is not None

    # We manage missing values so we do not remove any row
    assert len(subject.get('predictions')) == original_data_length
Esempio n. 4
0
def run_training() -> None:
    """Train the model."""

    # Read Training Data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # Divide Train and Test sets (only if we read the original data set
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.2,
                                                        random_state=0)

    pipeline.titanic_pipe.fit(X_train, y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.titanic_pipe)
def test_prediction_endpoint_validation_200(flask_test_client):
    # Load the test data from the classification_titanic package
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    post_json = test_data.to_json(orient='records')

    # Predict
    response = flask_test_client.post('/v1/predict/classification',
                                      json=post_json)

    # Test
    assert response.status_code == 200
    response_json = json.loads(response.data)

    # Check of correct number of errors removec if any
    if response_json.get('errors') is None:
        assert len(response_json.get('predictions')) == len(test_data)
    else:
        assert len(response_json.get('predictions')) + \
               len(response_json.get('errors')) == len(test_data)