Ejemplo n.º 1
0
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='test.csv')
    single_test_json = test_data[0:1]

    # When
    subject = make_prediction(input_data=single_test_json)
    print(subject.get('predictions')[0])

    # Then
    assert subject is not None
    assert isinstance((subject.get('predictions')[0]), np.integer)
    assert math.ceil(subject.get('predictions')[0]) == 0
Ejemplo n.º 2
0
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='test.csv')
    original_data_length = len(test_data)
    multiple_test_json = test_data

    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    assert len(subject.get('predictions')) == 100

    # We expect some rows to be filtered out
    assert len(subject.get('predictions')) == original_data_length
Ejemplo n.º 3
0
def test_prediction_endpoint_returns_prediction(flask_test_client):
    # Given
    # Load the test data from the sainsbury_discontinued package
    # This is important as it makes it harder for the test
    # data versions to get confused by not spreading it
    # across packages.
    test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE)
    post_json = test_data[0:1].to_json(orient='records')

    # When
    response = flask_test_client.post('/v1/predict/classifier',
                                      json=json.loads(post_json))

    # Then
    assert response.status_code == 200
    response_json = json.loads(response.data)
    prediction = response_json['predictions']
    response_version = response_json['version']
    assert prediction[0] == 0
    assert response_version == _version
Ejemplo n.º 4
0
def test_prediction_endpoint_validation_200(flask_test_client):
    # Given
    # Load the test data from the regression_model package.
    # This is important as it makes it harder for the test
    # data versions to get confused by not spreading it
    # across packages.
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    post_json = test_data.to_json(orient='records')

    # When
    response = flask_test_client.post('/v1/predict/classifier',
                                      json=json.loads(post_json))

    # Then
    assert response.status_code == 200
    response_json = json.loads(response.data)

    # Check correct number of errors removed
    assert len(response_json.get('predictions')) + len(
        response_json.get('errors')) == len(test_data)
Ejemplo n.º 5
0
def run_training() -> None:
    """Train the model."""

    # load training data
    data = dm.load_dataset(file_name=config.TRAINING_DATA_FILE)

    # train and test split
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.3,
        random_state=1984,
        stratify=data[config.TARGET])

    # fit pipeline
    pipeline.discontinued_pipe.fit(X_train[config.FEATURES], y_train)

    # add model version to logs
    _logger.info(f"saving model version: {_version}")

    # save pipeline
    dm.save_pipeline(pipeline_to_persist=pipeline.discontinued_pipe)
Ejemplo n.º 6
0
def make_predicitons(input_data):
    _discontinued_identifier = dm.load_pipeline(file_name=config.PIPELINE_PATH)
    result = _discontinued_identifier.predict(input_data)
    prob = _discontinued_identifier.predict_proba(input_data)
    prob_1 = prob[:, 1]

    return result, prob_1


if __name__ == '__main__':

    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report

    # load training data
    data = dm.load_dataset(file_name=config.TRAINING_DATA_FILE)

    # # train and test split
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.3,
        random_state=1984,
        stratify=data[config.TARGET])

    # prediction and propensity
    pred, prob_1 = make_predicitons(data[config.FEATURES])
    data['pred'] = pred
    data['prob_1'] = prob_1

    # # determine classifcation report