def soft_voting_clf(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False) : train_labels = np.int32(train_labels) test_labels = np.int32(test_labels) # --------------------------------------------------------------------------- # # --------------------------- HoG preprocessing ----------------------------- # if preprocess == 'hog': train_data, test_data = hog_processing(train_data, test_data) #Standardization scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) # --------------------------------------------------------------------------- # # ---------------------------- PCA preprocessing ---------------------------- # elif preprocess == 'pca': train_data, test_data, pca = pca_processing(train_data, test_data , explained_variance=0.8) # --------------------------------------------------------------------------- # # -------------------------- Soft Voting Classifier ------------------------- # # Training classifiers tree = DecisionTreeClassifier( max_depth=1,) clf2 = KNeighborsClassifier(n_neighbors=6, algorithm='auto') clf3 = SVC(gamma='scale', kernel='rbf', probability=True) clf4 = AdaBoostClassifier(base_estimator=tree, n_estimators=100, learning_rate = 0.23) eclf = VotingClassifier(estimators=[('knn', clf2), ('dt', clf3), ('ada', clf4)], voting='soft', weights=[1, 2, 1]) eclf.fit(train_data, train_labels) result = eclf.predict(test_data) # mask = result==test_labels # correct = np.count_nonzero(mask) accuracy = accuracy_score(test_labels, result) #correct/len(test_data) print('soft_voting_classifier') print ("Accuracy: {}".format(accuracy)) print ("Balanced Accuracy: {}".format(balanced_accuracy_score(test_labels, result))) if plot: plot_confusion_matrix(test_labels, result, normalize=True) # --------------------------------------------------------------------------- # # ------------------------- Save classifier to Disk ------------------------- # # with open('soft_clf_data_aug.pkl', 'wb') as fout: # pickle.dump((scaler, eclf), fout) # with open('soft_clf_data_aug.pkl', 'rb') as fin: # scaler, clf = pickle.load(fin) return accuracy, eclf
def adaboost(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False): train_labels = np.int32(train_labels) test_labels = np.int32(test_labels) # --------------------------------------------------------------------------- # # --------------------------- HoG preprocessing ----------------------------- # if preprocess == 'hog': train_data, test_data = hog_processing(train_data, test_data) #Standardization scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) # --------------------------------------------------------------------------- # # ---------------------------- PCA preprocessing ---------------------------- # elif preprocess == 'pca': train_data, test_data, pca = pca_processing(train_data, test_data, explained_variance=0.8) # --------------------------------------------------------------------------- # # -------------------------- sklearn - Adaboost ----------------------------- # #Default classifier = DecisionTreeClassifier clf = AdaBoostClassifier(n_estimators=100, learning_rate=0.23) #n_estimators=100, learning_rate=0.23 : accuracy = 55.02% #n_estimators > 500 with best learning_rate=0.1 changes nothing clf.fit(train_data, train_labels) result = clf.predict(test_data) accuracy = accuracy_score(test_labels, result) print('Adaboost - DecisionTree') print("Accuracy: {}".format(accuracy)) print("Balanced Accuracy: {}".format( balanced_accuracy_score(test_labels, result))) if plot: plot_confusion_matrix(test_labels, result, normalize=True) return accuracy, clf
def kNN(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False): train_labels = np.int32(train_labels) test_labels = np.int32(test_labels) # --------------------------------------------------------------------------- # # --------------------------- HoG preprocessing ----------------------------- # if preprocess == 'hog': train_data, test_data = hog_processing(train_data, test_data) #Standardization scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) # --------------------------------------------------------------------------- # # ---------------------------- PCA preprocessing ---------------------------- # elif preprocess == 'pca': train_data, test_data, pca = pca_processing(train_data, test_data, explained_variance=0.8) # --------------------------------------------------------------------------- # # ---------------------------- kNN Algorithm -------------------------------- # clf = KNeighborsClassifier(n_neighbors=6, algorithm='auto') #neighbors=6 clf.fit(train_data, train_labels) result = clf.predict(test_data) accuracy = accuracy_score(test_labels, result) print('kNN') print("Accuracy: {}".format(accuracy)) print("Balanced Accuracy: {}".format( balanced_accuracy_score(test_labels, result))) if plot: plot_confusion_matrix(test_labels, result, normalize=True) return accuracy, clf
def normal_bayes(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False) : train_labels = np.int32(train_labels) test_labels = np.int32(test_labels) # --------------------------------------------------------------------------- # # --------------------------- HoG preprocessing ----------------------------- # if preprocess == 'hog': train_data, test_data = hog_processing(train_data, test_data) #Standardization scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) # --------------------------------------------------------------------------- # # ---------------------------- PCA preprocessing ---------------------------- # elif preprocess == 'pca': train_data, test_data, pca = pca_processing(train_data, test_data , explained_variance=0.85) # --------------------------------------------------------------------------- # # ------------------------ Normal bayes classifier -------------------------- # clf = GaussianNB() clf.fit(train_data, train_labels) result = clf.predict(test_data) result = clf.predict(test_data) accuracy = accuracy_score(test_labels, result) print('naive_bayes - Gaussian') print ("Accuracy: {}".format(accuracy)) print ("Balanced Accuracy: {}".format(balanced_accuracy_score(test_labels, result))) if plot: plot_confusion_matrix(test_labels, result, normalize=True) return accuracy, clf
def kNN_SVM(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False): train_labels = np.int32(train_labels) test_labels = np.int32(test_labels) # --------------------------------------------------------------------------- # # --------------------------- HoG preprocessing ----------------------------- # if preprocess == 'hog': train_data, test_data = hog_processing(train_data, test_data) #Standardization scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) # --------------------------------------------------------------------------- # # ---------------------------- PCA preprocessing ---------------------------- # elif preprocess == 'pca': train_data, test_data, pca = pca_processing(train_data, test_data, explained_variance=0.8) # --------------------------------------------------------------------------- # # --------------------------- kNN preprocessing ----------------------------- # # Fit a KNN classifier on the training set knn_clf = KNeighborsClassifier(n_neighbors=6, algorithm='auto') #neighbors=6 knn_clf.fit(train_data, train_labels) #Initialize the array of predicted labels result = np.empty(len(test_labels), dtype=np.int) #Find the nearest neighbors indices for each sample in the test set kneighbors = knn_clf.kneighbors(test_data, return_distance=False) # For each set of neighbors indices for idx, indices in enumerate(kneighbors): # Find the actual training samples & their labels neighbors = [train_data[i] for i in indices] neighbors_labels = [train_labels[i] for i in indices] # if all labels are the same, use it as the prediction if all_same(neighbors_labels): result[idx] = neighbors_labels[0] else: # else fit a SVM classifier using the neighbors, and label the test samples svm_clf = svm.SVC(C=0.5, kernel='rbf', decision_function_shape='ovo', random_state=42, gamma='scale') svm_clf.fit(neighbors, neighbors_labels) label = svm_clf.predict(test_data[idx].reshape(1, -1)) result[idx] = label accuracy = accuracy_score(test_labels, result) print('knn_SVM') print("Accuracy: {}".format(accuracy)) print("Balanced Accuracy: {}".format( balanced_accuracy_score(test_labels, result))) if plot: plot_confusion_matrix(test_labels, result, normalize=True) return accuracy #, knn_clf
def SVM(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False): train_labels = np.int32(train_labels) test_labels = np.int32(test_labels) # --------------------------------------------------------------------------- # # --------------------------- HoG preprocessing ----------------------------- # if preprocess == 'hog': train_data, test_data = hog_processing(train_data, test_data) #Standardization scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) # --------------------------------------------------------------------------- # # ---------------------------- PCA preprocessing ---------------------------- # elif preprocess == 'pca': train_data, test_data, pca = pca_processing(train_data, test_data, explained_variance=0.85) # --------------------------------------------------------------------------- # # ---------------------------- sklearn - SVM -------------------------------- # clf = svm.SVC(decision_function_shape='ovo', probability=False, gamma='scale', coef0=0.5, random_state=42) clf.fit(train_data, train_labels) result = clf.predict(test_data) accuracy = accuracy_score(test_labels, result) print('SVM') print("Accuracy: {}".format(accuracy)) print("Balanced Accuracy: {}".format( balanced_accuracy_score(test_labels, result))) if plot: plot_confusion_matrix(test_labels, result, normalize=True) # --------------------------------------------------------------------------- # # ------------------------- Save classifier to Disk ------------------------- # # with open('svm_data_aug.pkl', 'wb') as fout: # pickle.dump((scaler, clf), fout) # with open('svm_data_aug.pkl', 'rb') as fin: # scaler, clf = pickle.load(fin) return accuracy, clf