Ejemplo n.º 1
0
def test_scoring_logreg_tune_correct(data_fixture, request):
    train_data, test_data = request.getfixturevalue(data_fixture)

    train_data.features = ScalingWithImputation().fit(
        train_data.features).apply(train_data.features)
    test_data.features = ScalingWithImputation().fit(test_data.features).apply(
        test_data.features)

    logreg = Model(model_type='logit')

    model, _ = logreg.fit(train_data)
    test_predicted = logreg.predict(fitted_model=model, data=test_data)

    test_roc_auc = roc_auc(y_true=test_data.target, y_score=test_predicted)

    logreg_for_tune = Model(model_type='logit')

    model_tuned, _ = logreg_for_tune.fine_tune(
        data=train_data, iterations=50, max_lead_time=timedelta(minutes=0.1))
    test_predicted_tuned = logreg_for_tune.predict(fitted_model=model_tuned,
                                                   data=test_data)

    test_roc_auc_tuned = roc_auc(y_true=test_data.target,
                                 y_score=test_predicted_tuned)

    roc_threshold = 0.6

    assert round(test_roc_auc_tuned, 2) >= round(test_roc_auc,
                                                 2) > roc_threshold
Ejemplo n.º 2
0
def test_knn_classification_tune_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    knn = Model(model_type='knn')
    model, _ = knn.fit(data=train_data)
    test_predicted = knn.predict(fitted_model=model, data=test_data)

    roc_on_test = roc_auc(y_true=test_data.target, y_score=test_predicted)

    roc_on_test_tuned_list = []
    for _ in range(3):
        knn_for_tune = Model(model_type='knn')
        model, _ = knn_for_tune.fine_tune(data=train_data,
                                          iterations=10,
                                          max_lead_time=timedelta(minutes=1))

        test_predicted_tuned = knn_for_tune.predict(fitted_model=model,
                                                    data=test_data)

        roc_on_test_tuned = roc_auc(y_true=test_data.target,
                                    y_score=test_predicted_tuned)

        roc_on_test_tuned_list.append(roc_on_test_tuned)

    roc_threshold = 0.6
    assert np.array(
        roc_on_test_tuned_list).any() >= roc_on_test > roc_threshold
Ejemplo n.º 3
0
def test_scaling_with_imputation(np_array_regression_with_missing_values):
    scaler = ScalingWithImputation()
    actual_scaled_data = scaler.fit_apply(
        np_array_regression_with_missing_values)

    scaler = StandardScaler()
    df = pd.DataFrame(np_array_regression_with_missing_values)
    data = df.fillna(df.mean()).to_numpy()
    expected_scaled_data = scaler.fit_transform(data)

    result = []
    for i in range(len(actual_scaled_data)):
        for j in range(len(actual_scaled_data[0])):
            result.append(
                math.isclose(actual_scaled_data[i][j],
                             expected_scaled_data[i][j],
                             abs_tol=0.00001))

    assert all(result)
Ejemplo n.º 4
0
def test_log_clustering_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    kmeans = Model(model_type='kmeans')

    _, train_predicted = kmeans.fit(data=train_data)

    assert all(np.unique(train_predicted) == [0, 1])
Ejemplo n.º 5
0
def test_random_forest_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    random_forest = Model(model_type='rf')

    _, train_predicted = random_forest.fit(data=train_data)

    roc_on_train = get_roc_auc(train_data, train_predicted)
    roc_threshold = 0.95
    assert roc_on_train >= roc_threshold
Ejemplo n.º 6
0
def test_log_regression_fit_correct(classification_dataset):
    data = classification_dataset
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    log_reg = Model(model_type='logit')

    _, train_predicted = log_reg.fit(data=train_data)

    roc_on_train = get_roc_auc(train_data, train_predicted)
    roc_threshold = 0.95
    assert roc_on_train >= roc_threshold
Ejemplo n.º 7
0
def test_classification_manual_tuning_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    knn = Model(model_type='knn')
    model, _ = knn.fit(data=train_data)
    test_predicted = knn.predict(fitted_model=model, data=test_data)

    knn_for_tune = Model(model_type='knn')
    knn_for_tune.params = {'n_neighbors': 1}
    model, _ = knn_for_tune.fit(data=train_data)

    test_predicted_tuned = knn_for_tune.predict(fitted_model=model,
                                                data=test_data)

    assert not np.array_equal(test_predicted, test_predicted_tuned)
Ejemplo n.º 8
0
def test_rf_class_tune_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    rf = Model(model_type='rf')

    model, _ = rf.fit(train_data)
    test_predicted = rf.predict(fitted_model=model, data=test_data)

    test_roc_auc = roc_auc(y_true=test_data.target, y_score=test_predicted)

    model_tuned, _ = rf.fine_tune(data=train_data,
                                  iterations=12,
                                  max_lead_time=timedelta(minutes=0.1))
    test_predicted_tuned = rf.predict(fitted_model=model_tuned, data=test_data)

    test_roc_auc_tuned = roc_auc(y_true=test_data.target,
                                 y_score=test_predicted_tuned)
    roc_threshold = 0.7

    assert test_roc_auc_tuned >= test_roc_auc
    assert test_roc_auc_tuned > roc_threshold
Ejemplo n.º 9
0
def test_pca_manual_tuning_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    data.features = ScalingWithImputation().fit(data.features).apply(
        data.features)
    train_data, test_data = train_test_data_setup(data=data)

    pca = Model(model_type='pca_data_model')
    model, _ = pca.fit(data=train_data)
    test_predicted = pca.predict(fitted_model=model, data=test_data)

    pca_for_tune = Model(model_type='pca_data_model')

    pca_for_tune.params = {
        'svd_solver': 'randomized',
        'iterated_power': 'auto',
        'dim_reduction_expl_thr': 0.7,
        'dim_reduction_min_expl': 0.001
    }

    model, _ = pca_for_tune.fit(data=train_data)
    test_predicted_tuned = pca_for_tune.predict(fitted_model=model,
                                                data=test_data)

    assert not np.array_equal(test_predicted, test_predicted_tuned)