def leave_one_out_cross_validation_accuracy(df, dependent_variable, k): correct_classfications = 0 total_classifications = len(df.to_numpy().tolist()) for i in range(total_classifications): independent_df = df[[ col for col in df.columns if col != dependent_variable ]] dependent_df = df[dependent_variable] left_out = independent_df.iloc[[i]].to_numpy().tolist()[0] actual_classification = dependent_df.iloc[[i]].to_numpy().tolist()[0] independent = independent_df.drop( [i]).reset_index(drop=True).to_numpy().tolist() dependent = dependent_df.drop( [i]).reset_index(drop=True).to_numpy().tolist() knn = knearestclass(n_neighbors=k) knn = knn.fit(independent, dependent) predicted_classification = knn.predict([left_out]) if predicted_classification == actual_classification: correct_classfications += 1 return correct_classfications / total_classifications
def leave_one_out_cross_validation_accuracy(independent_df, dependent_df, k): knn = knearestclass(n_neighbors=k) correct_classfications = 0 total_classifications = len(independent_df) print(k) for i in range(total_classifications): indep_copy = list(independent_df) dep_copy = list(dependent_df) left_out = indep_copy[i] actual_classification = dep_copy[i] indep_copy.pop(i) dep_copy.pop(i) knn = knn.fit(indep_copy, dep_copy) predicted_classification = knn.predict([left_out]) if predicted_classification == actual_classification: correct_classfications += 1 return correct_classfications / total_classifications
unnormalized = [] simple_scaling = [] ss_df = df.copy() min_max = [] mm_df = df.copy() z_scoring = [] zs_df = df.copy() for col in [col for col in ss_df if col != 'book type']: ss_df[col] = ss_df[col] / ss_df[col].max() mm_df[col] = (mm_df[col] - mm_df[col].min()) / (mm_df[col].max() - mm_df[col].min()) zs_df[col] = (zs_df[col] - zs_df[col].mean()) / zs_df[col].std() for k in k_vals: knn = knearestclass(n_neighbors=k) unnormalized.append(leave_one_out_accuracy(knn, df)) simple_scaling.append(leave_one_out_accuracy(knn, ss_df)) min_max.append(leave_one_out_accuracy(knn, mm_df)) z_scoring.append(leave_one_out_accuracy(knn, zs_df)) plt.style.use('bmh') plt.plot(k_vals, unnormalized, label='unnormalized') plt.plot(k_vals, simple_scaling, label='simple sclaing') plt.plot(k_vals, min_max, label='min-max') plt.plot(k_vals, z_scoring, label='z-scoring') plt.xlabel('k') plt.ylabel('Accuracy') plt.title('Leave-One-Out Accuracy for Various Normalizations') plt.legend(loc='best') plt.savefig('normalizaion_accuracies.png')