def run_training_swagger() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    pipeline.loan_status_pipe.fit(data[config.FEATURES], data[config.TARGET])
    save_pipeline_swagger(pipeline_to_persist=pipeline.loan_status_pipe)
    return
def test_make_single_prediction():
    # Given
    test_data = load_dataset(file_name='test.csv')
    single_test_json = test_data[0:1].to_json(orient='records')

    # When
    subject = make_prediction(input_data=single_test_json)

    # Then
    assert subject is not None
    assert isinstance(subject.get('predictions'), list)
    assert (subject.get('predictions')[0]) == 0
def test_make_multiple_predictions():
    # Given
    test_data = load_dataset(file_name='test.csv')
    #original_data_length = len(test_data)
    multiple_test_json = test_data.to_json(orient='records')

    # When
    subject = make_prediction(input_data=multiple_test_json)

    # Then
    assert subject is not None
    assert isinstance(subject.get('predictions'), list)
    assert (subject.get('predictions')).count(1) == 45931
    assert (subject.get('predictions')).count(0) == 34069
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # read training data
    #data = pd.read_csv(TRAINING_DATA_FILE)
    # divide train and test

    # X_train, X_test, y_train, y_test = train_test_split(
    #     data[config.FEATURES],
    #     data[config.TARGET],
    #     test_size=0.33, random_state=42)  # we are setting the seed here

    pipeline.loan_status_pipe.fit(data[config.FEATURES], data[config.TARGET])

    _logger.info(f'saving model version: {_version}')
    save_pipeline(pipeline_to_persist=pipeline.loan_status_pipe)
    return