def leave_one_out_cross_validation_accuracy(df, dependent_variable, k):
    correct_classfications = 0
    total_classifications = len(df.to_numpy().tolist())
    for i in range(total_classifications):
        independent_df = df[[
            col for col in df.columns if col != dependent_variable
        ]]
        dependent_df = df[dependent_variable]

        left_out = independent_df.iloc[[i]].to_numpy().tolist()[0]
        actual_classification = dependent_df.iloc[[i]].to_numpy().tolist()[0]

        independent = independent_df.drop(
            [i]).reset_index(drop=True).to_numpy().tolist()
        dependent = dependent_df.drop(
            [i]).reset_index(drop=True).to_numpy().tolist()

        knn = knearestclass(n_neighbors=k)
        knn = knn.fit(independent, dependent)
        predicted_classification = knn.predict([left_out])

        if predicted_classification == actual_classification:
            correct_classfications += 1

    return correct_classfications / total_classifications
def leave_one_out_cross_validation_accuracy(independent_df, dependent_df, k):
    knn = knearestclass(n_neighbors=k)
    correct_classfications = 0
    total_classifications = len(independent_df)
    print(k)
    for i in range(total_classifications):
        indep_copy = list(independent_df)
        dep_copy = list(dependent_df)

        left_out = indep_copy[i]
        actual_classification = dep_copy[i]

        indep_copy.pop(i)
        dep_copy.pop(i)

        knn = knn.fit(indep_copy, dep_copy)
        predicted_classification = knn.predict([left_out])

        if predicted_classification == actual_classification:
            correct_classfications += 1

    return correct_classfications / total_classifications
Beispiel #3
0
unnormalized = []
simple_scaling = []
ss_df = df.copy()
min_max = []
mm_df = df.copy()
z_scoring = []
zs_df = df.copy()

for col in [col for col in ss_df if col != 'book type']:
    ss_df[col] = ss_df[col] / ss_df[col].max()
    mm_df[col] = (mm_df[col] - mm_df[col].min()) / (mm_df[col].max() -
                                                    mm_df[col].min())
    zs_df[col] = (zs_df[col] - zs_df[col].mean()) / zs_df[col].std()

for k in k_vals:
    knn = knearestclass(n_neighbors=k)
    unnormalized.append(leave_one_out_accuracy(knn, df))
    simple_scaling.append(leave_one_out_accuracy(knn, ss_df))
    min_max.append(leave_one_out_accuracy(knn, mm_df))
    z_scoring.append(leave_one_out_accuracy(knn, zs_df))

plt.style.use('bmh')
plt.plot(k_vals, unnormalized, label='unnormalized')
plt.plot(k_vals, simple_scaling, label='simple sclaing')
plt.plot(k_vals, min_max, label='min-max')
plt.plot(k_vals, z_scoring, label='z-scoring')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.title('Leave-One-Out Accuracy for Various Normalizations')
plt.legend(loc='best')
plt.savefig('normalizaion_accuracies.png')