def crossValidation(train_x, train_y, class_num, filename, model_name, K=3): """Doing k-fold cross validation, default K is 3.""" divided = int(len(train_x) / K) overall_acc = 0 total_PD = [] total_FA = [] for fold in range(K): print() print("Now fold is {}".format(fold)) # Compute start and end index start = divided * fold end = divided * (fold + 1) training_x = np.concatenate((train_x[:start], train_x[end:])) training_y = np.concatenate((train_y[:start], train_y[end:])) validation_x = train_x[start:end] validation_y = train_y[start:end].values #Choose model if model_name == 'NBC': prior, train_mean, train_var = NBC.train(training_x, training_y, class_num) acc, FA, PD = NBC.test(validation_x, validation_y, prior, train_mean, train_var, class_num, filename, model_name, False) total_FA.append(np.array(FA)) total_PD.append(np.array(PD)) overall_acc += acc print("Overall accuracy: {}".format(overall_acc / K)) # Plot ROC curve for cv total_FA = np.array(total_FA) total_PD = np.array(total_PD) FA_mean = np.mean(total_FA, axis=0) PD_mean = np.mean(total_PD, axis=0) FA_var = np.var(total_FA, axis=0) PD_var = np.var(total_PD, axis=0) # Plot ROC curve of validation data fig = plt.figure() plt.errorbar(FA_mean, PD_mean, yerr=PD_var, uplims=True, lolims=True) # plt.xlim(0, 1) # plt.ylim(0, 1) plt.xlabel('FA') plt.ylabel('PD') fig.savefig('plotting/' + filename + '_' + model_name + '_lower_roc_CV.png')
wine_train_x, wine_train_y, wine_test_x, wine_test_y = readData() K = 5 class_num = 3 print("====================== IRIS =================") # Bayesian classifier crossValidation(iris_train_x, iris_train_y, class_num, 'iris', 'BC', K) prior, train_mean, train_cov = BC.train(iris_train_x.values, iris_train_y.values, class_num) acc = BC.test(iris_test_x.values, iris_test_y.values, prior, train_mean, train_cov, class_num, 'iris', 'BC', True) # Naive-Bayes classifier crossValidation(iris_train_x, iris_train_y, class_num, 'iris', 'NBC', K) prior, train_mean, train_var = NBC.train(iris_train_x.values, iris_train_y.values, class_num) acc = NBC.test(iris_test_x.values, iris_test_y.values, prior, train_mean, train_var, class_num, 'iris', 'NBC', True) print("====================== WINE =================") # Bayesian classifier crossValidation(wine_train_x, wine_train_y, class_num, 'wine', 'BC', K) prior, train_mean, train_cov = BC.train(wine_train_x.values, wine_train_y.values, class_num) acc = BC.test(wine_test_x.values, wine_test_y.values, prior, train_mean, train_cov, class_num, 'wine', 'BC', True) # Naive-Bayes classifier crossValidation(wine_train_x, wine_train_y, class_num, 'wine', 'NBC', K) prior, train_mean, train_var = NBC.train(wine_train_x.values, wine_train_y.values, class_num) acc = NBC.test(wine_test_x.values, wine_test_y.values, prior, train_mean, train_var, class_num, 'wine', 'NBC', True)
# print("eigen vector shape = {}".format(eigen_vectors.shape)) # print("lower_dimension_data training shape: {}".format(lower_dimension_data.shape)) # print("lower_dimension_data testing shape: {}".format(lower_dimension_data_test.shape)) # # Applying PCA to classifier # prior, train_mean, train_cov = NBC.train(lower_dimension_data, iris_train_y.values.ravel(), CLASS_NUM) # acc = NBC.test(lower_dimension_data_test, iris_test_y.values.ravel(), prior, train_mean, train_cov, CLASS_NUM, 'iris_task2', 'NBC', True) # predict, probability, model = TestBest(lower_dimension_data, iris_train_y.values.ravel(), lower_dimension_data_test, iris_test_y.values.ravel()) # computeConfusionMatrix(probability, iris_test_y.values.ravel()) # plotROC(probability, iris_test_y.values.ravel(), 'iris_task2_low_SVM') print("=================== BREAST ==============") # Original data to classifier prior, train_mean, train_cov = NBC.train(breast_train_x.values, breast_train_y.values.ravel(), CLASS_NUM) acc = NBC.test(breast_test_x.values, breast_test_y.values.ravel(), prior, train_mean, train_cov, CLASS_NUM, 'breast_task2_ori', 'NBC', True) predict, probability, model = TestBest(breast_train_x.values, breast_train_y.values.ravel(), breast_test_x.values, breast_test_y.values.ravel()) computeConfusionMatrix(probability, breast_test_y.values.ravel()) plotROC(probability, breast_test_y.values.ravel(), 'breast_task2_ori_SVM') print("===============") lower_dimension_data, eigen_vectors = PCA(breast_train_x.values) lower_dimension_data_test = np.matmul(breast_test_x.values, eigen_vectors) print("data shape = {}".format(breast_train_x.values.shape)) print("eigen vector shape = {}".format(eigen_vectors.shape)) print("lower_dimension_data training shape: {}".format(lower_dimension_data.shape)) print("lower_dimension_data testing shape: {}".format(lower_dimension_data_test.shape)) prior, train_mean, train_cov = NBC.train(lower_dimension_data, breast_train_y.values.ravel(), CLASS_NUM)