def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # Feature engineering
    r2 = setup(data,
               target='charges',
               session_id=123,
               normalize=True,
               polynomial_features=True,
               trigonometry_features=True,
               feature_interaction=True,
               bin_numeric_features=['age', 'bmi'],
               silent=True)

    # Model Training and Validation
    lr = create_model('lr', verbose=False)

    # Prepare versioned save file name
    save_file_name = f"{config.PIPELINE_SAVE_FILE}{_version}"
    save_path = config.TRAINED_MODEL_DIR / save_file_name

    # Remove old pipelines
    keep_file_name = f"{config.PIPELINE_SAVE_FILE}{_version}.pkl"
    remove_old_pipelines(files_to_keep=keep_file_name)

    # save transformation pipeline and model
    save_model(lr, model_name=str(save_path))
Exemple #2
0
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='inscharges-test.csv')
    single_test_json = test_data[0:1].to_json(orient='records')

    # When
    subject = make_prediction(input_data=single_test_json)

    predicted_value = (subject.get('predictions')['Label']).values[0]

    # Then
    assert subject is not None
    assert isinstance(predicted_value, float)
Exemple #3
0
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='inscharges-test.csv')
    original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient='records')

    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    assert len(subject.get('predictions')) == 20

    # We expect no rows to be filtered out
    assert len(subject.get('predictions')) == original_data_length
Exemple #4
0
def test_prediction_endpoint_validation_200(flask_test_client):
    # Given
    # Load the test data from the regression_model package.
    # This is important as it makes it harder for the test
    # data versions to get confused by not spreading it
    # across packages.
    test_data = load_dataset(file_name=config.TESTING_DATA_FILE)
    post_json = test_data[0:2].to_json(orient='records')

    # When
    response = flask_test_client.post('/v1/predict/inscharge', json=post_json)

    # Then
    assert response.status_code == 200
    response_json = json.loads(response.data)

    _logger.info(f'Response Json: {response_json}')
def test_prediction_endpoint_returns_prediction(flask_test_client):
    # Given
    # Load the test data from the regression_model package
    # This is important as it makes it harder for the test
    # data versions to get confused by not spreading it
    # across packages.
    test_data = load_dataset(file_name=model_config.TESTING_DATA_FILE)
    post_json = test_data[0:5].to_json(orient='records')

    # When
    response = flask_test_client.post('/v1/predict/inscharge', json=post_json)

    # Then
    assert response.status_code == 200

    response_json = json.loads(response.data)
    prediction = response_json['predictions']
    response_version = response_json['version']
    #assert len(prediction) == 5
    assert response_version == _version