def main(): l = 2 k = 8 data_all_classes = load_bovw_train_set() data_all_classes_combined = np.concatenate( (data_all_classes[0], data_all_classes[1], data_all_classes[2]), axis=0) mean = np.mean(data_all_classes_combined, axis=0) #// Build cov matrix with training data all classes cov_mat = np.cov(data_all_classes_combined.T) #// Do Eigen analysis on training data all classes eigen_values, eigen_vectors = np.linalg.eig(cov_mat) for i in eigen_vectors: if (np.linalg.norm(i) - 1) == 0.001: print("alert!") index = eigen_values.argsort()[::-1] eigen_values_ordered = eigen_values[index] eigen_vectors_ordered = eigen_vectors[:, index] # plot_eigen_values(eigen_values_ordered, l) #// Pick the corresponding eigen vectors for each case and make reduced data of training and test data selected_eigen_vectors = np.asmatrix(eigen_vectors_ordered[:, :l]) train_data_reduced = perform_data_reduction(data_all_classes, selected_eigen_vectors, l, mean) train_data_reduced_combined = np.concatenate( (train_data_reduced[0], train_data_reduced[1], train_data_reduced[2]), axis=0) # plot_univariate_data(train_data_reduced, l) # plot_bivariate_data(train_data_reduced, l) mu_each_class_each_k = np.zeros((3, k, l)) pi_each_class_each_k = np.zeros((3, k)) sigma_each_class_each_k = np.zeros((3, k, l, l)) for class_index in range(len(train_data_reduced)): print("class", class_index) #// Do K - Means with reduced training data centers = find_initial_random_centers(train_data_reduced[class_index], k, l) centers_after_Kmeans, whcluster = gmm.computeKMC( train_data_reduced[class_index], centers, k) cluster_data = gmm.groupdata(train_data_reduced[class_index], centers_after_Kmeans, whcluster, k) #// Build GMM on training data log_likelihood_all_iters, pi_final_each_k, mu_final_each_k, sigma_final_each_k = gmm.compute_GMM( centers_after_Kmeans, train_data_reduced[class_index], cluster_data, k) plot_iters_vs_loglikelihood(log_likelihood_all_iters, 3, k, class_index, l) pi_each_class_each_k[class_index] = pi_final_each_k mu_each_class_each_k[class_index] = mu_final_each_k sigma_each_class_each_k[class_index] = sigma_final_each_k # Build Confusion matrix with test data data_all_classes_test = load_bovw_test_set() test_data_reduced = perform_data_reduction(data_all_classes_test, selected_eigen_vectors, l, mean) confusion_matrix = gmm.compute_confusion_matrix(test_data_reduced, pi_each_class_each_k, mu_each_class_each_k, sigma_each_class_each_k, k, 0) print("\nconfusion_matrix") gmm.print_matrix(confusion_matrix) performance_matrix = gmm.find_performance_matrix(confusion_matrix) print("\nperformance_matrix") gmm.print_matrix(performance_matrix) class_accuracy = gmm.find_accuracy(confusion_matrix) print("\nclass accuracy: ", class_accuracy)
def main(): l = 1 opt_type = 2 #data set option k = 4 # Number of clusters btw_classes = 4 #plot contour plots between the selected classes dimension_of_data = 2 #please specify the dimension of data here.. #Loads the required classes.. if(opt_type == 1): data_all_classes = loadcls.load_LS_trainingset() data_all_classes_test = loadcls.load_LS_test_set() elif(opt_type == 2): data_all_classes = loadcls.load_NLS_trainingset() data_all_classes_test = loadcls.load_NLS_test_set() elif(opt_type == 3): data_all_classes = loadcls.load_bovw_train_set() data_all_classes_test = loadcls.load_bovw_test_set() #arrange the information # elif (btw_classes == 4): # data_some_classes = data_all_classes # btw_num_of_classes = 3 # class_names = ['C1','C2','C3'] # selected_colors = [0, 1, 2] mean_two_classes, scatter_two_classes = [], [] if( btw_classes == 4): data_cls0_cls1 = [data_all_classes[0], data_all_classes[1]] data_cls1_cls2 = [data_all_classes[1], data_all_classes[2]] data_cls0_cls2 = [data_all_classes[0], data_all_classes[2]] # data_cls0_cls1_test = [data_all_classes_test[0], data_all_classes_test[1]] # data_cls1_cls2_test = [data_all_classes_test[1], data_all_classes_test[2]] # data_cls0_cls2_test = [data_all_classes_test[0], data_all_classes_test[2]] cls01_cls12_cls02 = [data_cls0_cls1, data_cls1_cls2, data_cls0_cls2] # cls01_cls12_cls02_test = [data_cls0_cls1_test, data_cls1_cls2_test, data_cls0_cls2_test] proj_cls_all_combination = [] pi_each_class_each_k_diff_combination, mu_each_class_each_k_diff_combination,sigma_each_class_each_k_diff_combination = [], [], [] W_diff_combinn = [] for i in range(len(cls01_cls12_cls02)): data_some_classes_train = cls01_cls12_cls02[i] btw_num_of_classes = 2 within_cls_scatterM, btw_cls_scatterM, mu1_minus_mu2 = find_Sb_and_Sw(data_some_classes_train, btw_num_of_classes) w, eigen_vector_max = find_project_vector(within_cls_scatterM, btw_cls_scatterM, mu1_minus_mu2) proj_tr_some_cls = find_projected_points(data_some_classes_train, w, eigen_vector_max) proj_cls_all_combination.append(proj_tr_some_cls) W_diff_combinn.append(w) #eigen_vector_max is used for projection and w is not used.. we can interchange... train_data_reduced = proj_tr_some_cls mu_each_class_each_k = np.zeros((2, k, l)) pi_each_class_each_k = np.zeros((2, k)) sigma_each_class_each_k = np.zeros((2, k, l, l)) for class_index in range(len(train_data_reduced)): print ("class", class_index) #// Do K - Means with reduced training data centers = find_initial_random_centers(train_data_reduced[class_index], k, l) centers_after_Kmeans, whcluster = gmm.computeKMC(train_data_reduced[class_index], centers, k) cluster_data = gmm.groupdata(train_data_reduced[class_index], centers_after_Kmeans, whcluster, k) #// Build GMM on training data log_likelihood_all_iters, pi_final_each_k, mu_final_each_k, sigma_final_each_k = gmm.compute_GMM(centers_after_Kmeans, train_data_reduced[class_index], cluster_data, k) # plot_iters_vs_loglikelihood(log_likelihood_all_iters, 3, k, class_index, l) pi_each_class_each_k[class_index] = pi_final_each_k mu_each_class_each_k[class_index] = mu_final_each_k sigma_each_class_each_k[class_index] = sigma_final_each_k pi_each_class_each_k_diff_combination.append(pi_each_class_each_k) mu_each_class_each_k_diff_combination.append(mu_each_class_each_k) sigma_each_class_each_k_diff_combination.append(sigma_each_class_each_k) conf_mat = compute_confusion_matrix(data_all_classes_test, pi_each_class_each_k_diff_combination, mu_each_class_each_k_diff_combination, sigma_each_class_each_k_diff_combination, W_diff_combinn, k) gmm.print_matrix(conf_mat) performance_matrix = gmm.find_performance_matrix(conf_mat) print("\nperformance_matrix") gmm.print_matrix(performance_matrix) class_accuracy = gmm.find_accuracy(conf_mat) print("\nclass accuracy: ", class_accuracy) # plot_decision_boundary(data_all_classes, pi_each_class_each_k_diff_combination, mu_each_class_each_k_diff_combination, sigma_each_class_each_k_diff_combination, W_diff_combinn, k) # plot_classes(proj_cls_all_combination[0][0], proj_cls_all_combination[0][1]) plot_scatter(cls01_cls12_cls02[0][0], cls01_cls12_cls02[0][1], W_diff_combinn[0],proj_cls_all_combination[0]) # data_some_classes_test = cls01_cls12_cls02_test[i] # proj_test_some_cls = find_projected_points(data_some_classes_test, w, eigen_vector_max) # wh_class_arr = similar_to_confusion_matrix(proj_test_some_cls, pi_each_class_each_k, mu_each_class_each_k, sigma_each_class_each_k, k) print ("here")
def kernel_fn(data_all_classes, data_all_classes_test, C, gamma): clf = svm.SVC(kernel='linear') # Linear Kernel data_train = np.concatenate( (data_all_classes[0], data_all_classes[1], data_all_classes[2]), axis=0) data_test = np.concatenate( (data_all_classes_test[0], data_all_classes_test[1], data_all_classes_test[2]), axis=0) colorUse = ("red", "green", "blue") catColor = (u'#FFAFAF', u'#BBFFB9', u'#BBB9FF') cls0 = np.zeros(len(data_all_classes[0])) cls1 = np.zeros(len(data_all_classes[1])) cls2 = np.zeros(len(data_all_classes[2])) cls0.fill(0) cls1.fill(1) cls2.fill(2) cls_train = np.concatenate((cls0, cls1, cls2), axis=0) cls0 = np.zeros(len(data_all_classes_test[0])) cls1 = np.zeros(len(data_all_classes_test[1])) cls2 = np.zeros(len(data_all_classes_test[2])) cls0.fill(0) cls1.fill(1) cls2.fill(2) cls_test = np.concatenate((cls0, cls1, cls2), axis=0) x_min, x_max = data_train[:, 0].min() - 1, data_train[:, 0].max() + 1 y_min, y_max = data_train[:, 1].min() - 1, data_train[:, 1].max() + 1 h = abs((x_max / x_min) / 100) xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) X_plot = np.c_[xx.ravel(), yy.ravel()] # Create the SVC model object C = 1.0 # SVM regularization parameter svc = svm.SVC(kernel='linear', C=C, decision_function_shape='ovr').fit(data_train, cls_train) Z = svc.predict(X_plot) Z = Z.reshape(xx.shape) plt.figure(figsize=(15, 5)) plt.subplot(121) plt.contourf(xx, yy, Z, cmap=plt.cm.tab10, alpha=0.3) plt.scatter(data_train[:, 0], data_train[:, 1], c=cls_train, cmap=plt.cm.Set1) plt.xlabel('X axis') plt.ylabel('Y axis') plt.xlim(xx.min(), xx.max()) plt.title('SVM: Linear Kernel') y_pred = svc.predict(data_test) print(confusion_matrix(cls_test, y_pred)) conf_mat = confusion_matrix(cls_test, y_pred) #svm call perf_mat = gmm.find_performance_matrix(conf_mat) print(perf_mat) print(classification_report(cls_test, y_pred)) #Polynomial kernel # parameters = [{'kernel': ['rbf'], 'gamma': [1e-4, 1e-3, 0.01, 0.1, 0.2, 0.5],'C': [1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}] # clf = GridSearchCV(svm.SVC(decision_function_shape='ovr'), parameters, cv=5) svc = svm.SVC(kernel='rbf', C=C, gamma=gamma, decision_function_shape='ovr').fit(data_train, cls_train) # clf.fit(data_train, cls_train) Z = svc.predict(X_plot) Z = Z.reshape(xx.shape) plt.figure(figsize=(15, 5)) plt.subplot(121) plt.contourf(xx, yy, Z, cmap=plt.cm.tab10, alpha=0.3) plt.scatter(data_train[:, 0], data_train[:, 1], c=cls_train, cmap=plt.cm.Set1) plt.xlabel('X axis') plt.ylabel('Y axis') plt.xlim(xx.min(), xx.max()) plt.title('SVM: RBF Kernel') y_pred = svc.predict(data_test) print(confusion_matrix(cls_test, y_pred)) conf_mat = confusion_matrix(cls_test, y_pred) #svm call perf_mat = gmm.find_performance_matrix(conf_mat) print(perf_mat) print(classification_report(cls_test, y_pred)) kernel_fn_another_two(data_all_classes, data_all_classes_test, C, gamma)
def supportvectors(data_all_classes, data_all_classes_test, C, gamma): data_train = np.concatenate( (data_all_classes[0], data_all_classes[1], data_all_classes[2]), axis=0) data_test = np.concatenate( (data_all_classes_test[0], data_all_classes_test[1], data_all_classes_test[2]), axis=0) cls0 = np.zeros(len(data_all_classes[0])) cls1 = np.zeros(len(data_all_classes[1])) cls2 = np.zeros(len(data_all_classes[2])) cls0.fill(0) cls1.fill(1) cls2.fill(2) cls_train = np.concatenate((cls0, cls1, cls2), axis=0) cls0 = np.zeros(len(data_all_classes_test[0])) cls1 = np.zeros(len(data_all_classes_test[1])) cls2 = np.zeros(len(data_all_classes_test[2])) cls0.fill(0) cls1.fill(1) cls2.fill(2) cls_test = np.concatenate((cls0, cls1, cls2), axis=0) x_min, x_max = data_train[:, 0].min() - 1, data_train[:, 0].max() + 1 y_min, y_max = data_train[:, 1].min() - 1, data_train[:, 1].max() + 1 h = abs((x_max / x_min) / 100) xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) X_plot = np.c_[xx.ravel(), yy.ravel()] # Create the SVC model object C = 1.0 # SVM regularization parameter svc = svm.SVC(kernel='rbf', C=C, gamma=gamma, decision_function_shape='ovr').fit(data_train, cls_train) Z = svc.predict(X_plot) Z = Z.reshape(xx.shape) plt.figure(figsize=(15, 5)) plt.subplot(121) # plt.contourf(xx, yy, Z, cmap=plt.cm.tab10, alpha=0.3) plt.contour(xx, yy, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) plt.scatter(svc.support_vectors_[:, 0], svc.support_vectors_[:, 1], s=100, linewidth=1, facecolors='b') plt.scatter(data_train[:, 0], data_train[:, 1], c=cls_train, cmap=plt.cm.Set1) plt.xlabel('X axis') plt.ylabel('Y axis') plt.xlim(xx.min(), xx.max()) plt.title('SVM: RBF Kernel') y_pred = svc.predict(data_test) conf_mat = confusion_matrix(cls_test, y_pred) #svm call perf_mat = gmm.find_performance_matrix(conf_mat) print(confusion_matrix(cls_test, y_pred)) print(perf_mat) print(classification_report(cls_test, y_pred))
def bovw_predict(data_all_classes, data_all_classes_test, C, gamma): clf = svm.SVC(kernel='linear') # Linear Kernel data_train = np.concatenate( (data_all_classes[0], data_all_classes[1], data_all_classes[2]), axis=0) data_test = np.concatenate( (data_all_classes_test[0], data_all_classes_test[1], data_all_classes_test[2]), axis=0) cls0 = np.zeros(len(data_all_classes[0])) cls1 = np.zeros(len(data_all_classes[1])) cls2 = np.zeros(len(data_all_classes[2])) cls0.fill(0) cls1.fill(1) cls2.fill(2) cls_train = np.concatenate((cls0, cls1, cls2), axis=0) cls0 = np.zeros(len(data_all_classes_test[0])) cls1 = np.zeros(len(data_all_classes_test[1])) cls2 = np.zeros(len(data_all_classes_test[2])) cls0.fill(0) cls1.fill(1) cls2.fill(2) cls_test = np.concatenate((cls0, cls1, cls2), axis=0) svc = svm.SVC(kernel='linear', C=C, decision_function_shape='ovr').fit(data_train, cls_train) y_pred = svc.predict(data_test) conf_mat = confusion_matrix(cls_test, y_pred) #svm call perf_mat = gmm.find_performance_matrix(conf_mat) print(conf_mat) print(perf_mat) print(classification_report(cls_test, y_pred)) svc = svm.SVC(kernel='rbf', C=C, gamma=gamma, decision_function_shape='ovr').fit(data_train, cls_train) y_pred = svc.predict(data_test) conf_mat = confusion_matrix(cls_test, y_pred) #svm call perf_mat = gmm.find_performance_matrix(conf_mat) print(conf_mat) print(perf_mat) print(classification_report(cls_test, y_pred)) svc = svm.SVC(kernel='poly', degree=4, gamma=gamma, C=C, decision_function_shape='ovr').fit(data_train, cls_train) y_pred = svc.predict(data_test) conf_mat = confusion_matrix(cls_test, y_pred) #svm call perf_mat = gmm.find_performance_matrix(conf_mat) print(conf_mat) print(perf_mat) print(classification_report(cls_test, y_pred)) svc = svm.SVC(kernel='sigmoid', gamma=gamma, C=C, decision_function_shape='ovr').fit(data_train, cls_train) y_pred = svc.predict(data_test) conf_mat = confusion_matrix(cls_test, y_pred) #svm call perf_mat = gmm.find_performance_matrix(conf_mat) print(conf_mat) print(perf_mat) print(classification_report(cls_test, y_pred))