예제 #1
0
def soft_voting_clf(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False) :

    train_labels = np.int32(train_labels)
    test_labels = np.int32(test_labels)
        
# --------------------------------------------------------------------------- #
# --------------------------- HoG preprocessing ----------------------------- #
    
    if preprocess == 'hog':    
        
        train_data, test_data = hog_processing(train_data, test_data)
        #Standardization
        scaler = preprocessing.StandardScaler().fit(train_data)
        train_data = scaler.transform(train_data)
        test_data = scaler.transform(test_data)
    
# --------------------------------------------------------------------------- #
# ---------------------------- PCA preprocessing ---------------------------- #
        
    elif preprocess == 'pca':
        
        train_data, test_data, pca = pca_processing(train_data, test_data
                                                    , explained_variance=0.8)
    
# --------------------------------------------------------------------------- #
# -------------------------- Soft Voting Classifier ------------------------- #
    
    # Training classifiers
    tree = DecisionTreeClassifier( max_depth=1,)
    clf2 = KNeighborsClassifier(n_neighbors=6, algorithm='auto')
    clf3 = SVC(gamma='scale', kernel='rbf', probability=True)
    clf4 = AdaBoostClassifier(base_estimator=tree, n_estimators=100, learning_rate = 0.23) 

    eclf = VotingClassifier(estimators=[('knn', clf2), ('dt', clf3), ('ada', clf4)],
                                        voting='soft', weights=[1, 2, 1])
    
    eclf.fit(train_data, train_labels)
    
    result = eclf.predict(test_data)
#    mask = result==test_labels
#    correct = np.count_nonzero(mask)
    accuracy = accuracy_score(test_labels, result) #correct/len(test_data)
    print('soft_voting_classifier')
    print ("Accuracy: {}".format(accuracy))
    print ("Balanced Accuracy: {}".format(balanced_accuracy_score(test_labels, result)))
    
    if plot:
        plot_confusion_matrix(test_labels, result, normalize=True)
    
# --------------------------------------------------------------------------- #
# ------------------------- Save classifier to Disk ------------------------- #
      
#    with open('soft_clf_data_aug.pkl', 'wb') as fout:
#      pickle.dump((scaler, eclf), fout)
    
#    with open('soft_clf_data_aug.pkl', 'rb') as fin:
#      scaler, clf = pickle.load(fin)
      
    return accuracy, eclf
예제 #2
0
def adaboost(train_data,
             test_data,
             train_labels,
             test_labels,
             preprocess='hog',
             plot=False):

    train_labels = np.int32(train_labels)
    test_labels = np.int32(test_labels)

    # --------------------------------------------------------------------------- #
    # --------------------------- HoG preprocessing ----------------------------- #

    if preprocess == 'hog':

        train_data, test_data = hog_processing(train_data, test_data)
        #Standardization
        scaler = preprocessing.StandardScaler().fit(train_data)
        train_data = scaler.transform(train_data)
        test_data = scaler.transform(test_data)

# --------------------------------------------------------------------------- #
# ---------------------------- PCA preprocessing ---------------------------- #

    elif preprocess == 'pca':

        train_data, test_data, pca = pca_processing(train_data,
                                                    test_data,
                                                    explained_variance=0.8)


# --------------------------------------------------------------------------- #
# -------------------------- sklearn - Adaboost ----------------------------- #

#Default classifier = DecisionTreeClassifier
    clf = AdaBoostClassifier(n_estimators=100, learning_rate=0.23)
    #n_estimators=100, learning_rate=0.23 : accuracy = 55.02%
    #n_estimators > 500 with best learning_rate=0.1 changes nothing
    clf.fit(train_data, train_labels)

    result = clf.predict(test_data)
    accuracy = accuracy_score(test_labels, result)

    print('Adaboost - DecisionTree')
    print("Accuracy: {}".format(accuracy))
    print("Balanced Accuracy: {}".format(
        balanced_accuracy_score(test_labels, result)))

    if plot:
        plot_confusion_matrix(test_labels, result, normalize=True)

    return accuracy, clf
예제 #3
0
def kNN(train_data,
        test_data,
        train_labels,
        test_labels,
        preprocess='hog',
        plot=False):

    train_labels = np.int32(train_labels)
    test_labels = np.int32(test_labels)

    # --------------------------------------------------------------------------- #
    # --------------------------- HoG preprocessing ----------------------------- #

    if preprocess == 'hog':

        train_data, test_data = hog_processing(train_data, test_data)
        #Standardization
        scaler = preprocessing.StandardScaler().fit(train_data)
        train_data = scaler.transform(train_data)
        test_data = scaler.transform(test_data)

# --------------------------------------------------------------------------- #
# ---------------------------- PCA preprocessing ---------------------------- #

    elif preprocess == 'pca':

        train_data, test_data, pca = pca_processing(train_data,
                                                    test_data,
                                                    explained_variance=0.8)


# --------------------------------------------------------------------------- #
# ---------------------------- kNN Algorithm -------------------------------- #

    clf = KNeighborsClassifier(n_neighbors=6, algorithm='auto')  #neighbors=6
    clf.fit(train_data, train_labels)

    result = clf.predict(test_data)
    accuracy = accuracy_score(test_labels, result)

    print('kNN')
    print("Accuracy: {}".format(accuracy))
    print("Balanced Accuracy: {}".format(
        balanced_accuracy_score(test_labels, result)))

    if plot:
        plot_confusion_matrix(test_labels, result, normalize=True)

    return accuracy, clf
예제 #4
0
def normal_bayes(train_data, test_data, train_labels, test_labels, preprocess='hog', plot=False) :

    train_labels = np.int32(train_labels)
    test_labels = np.int32(test_labels)
        
# --------------------------------------------------------------------------- #
# --------------------------- HoG preprocessing ----------------------------- #
    
    if preprocess == 'hog':    
        
        train_data, test_data = hog_processing(train_data, test_data)
        #Standardization
        scaler = preprocessing.StandardScaler().fit(train_data)
        train_data = scaler.transform(train_data)
        test_data = scaler.transform(test_data)
    
# --------------------------------------------------------------------------- #
# ---------------------------- PCA preprocessing ---------------------------- #
        
    elif preprocess == 'pca':
        
        train_data, test_data, pca = pca_processing(train_data, test_data
                                                    , explained_variance=0.85)
    
# --------------------------------------------------------------------------- #
# ------------------------ Normal bayes classifier -------------------------- #
    
    clf = GaussianNB()
    clf.fit(train_data, train_labels)
    
    result = clf.predict(test_data)

    result = clf.predict(test_data)
    accuracy = accuracy_score(test_labels, result)
    
    print('naive_bayes - Gaussian')
    print ("Accuracy: {}".format(accuracy))
    print ("Balanced Accuracy: {}".format(balanced_accuracy_score(test_labels, result)))

    if plot:
        plot_confusion_matrix(test_labels, result, normalize=True)
        
    return accuracy, clf
예제 #5
0
def kNN_SVM(train_data,
            test_data,
            train_labels,
            test_labels,
            preprocess='hog',
            plot=False):

    train_labels = np.int32(train_labels)
    test_labels = np.int32(test_labels)

    # --------------------------------------------------------------------------- #
    # --------------------------- HoG preprocessing ----------------------------- #

    if preprocess == 'hog':

        train_data, test_data = hog_processing(train_data, test_data)
        #Standardization
        scaler = preprocessing.StandardScaler().fit(train_data)
        train_data = scaler.transform(train_data)
        test_data = scaler.transform(test_data)

# --------------------------------------------------------------------------- #
# ---------------------------- PCA preprocessing ---------------------------- #

    elif preprocess == 'pca':

        train_data, test_data, pca = pca_processing(train_data,
                                                    test_data,
                                                    explained_variance=0.8)


# --------------------------------------------------------------------------- #
# --------------------------- kNN preprocessing ----------------------------- #

# Fit a KNN classifier on the training set
    knn_clf = KNeighborsClassifier(n_neighbors=6,
                                   algorithm='auto')  #neighbors=6
    knn_clf.fit(train_data, train_labels)

    #Initialize the array of predicted labels
    result = np.empty(len(test_labels), dtype=np.int)

    #Find the nearest neighbors indices for each sample in the test set
    kneighbors = knn_clf.kneighbors(test_data, return_distance=False)

    # For each set of neighbors indices
    for idx, indices in enumerate(kneighbors):
        # Find the actual training samples & their labels
        neighbors = [train_data[i] for i in indices]
        neighbors_labels = [train_labels[i] for i in indices]

        # if all labels are the same, use it as the prediction
        if all_same(neighbors_labels):
            result[idx] = neighbors_labels[0]
        else:
            # else fit a SVM classifier using the neighbors, and label the test samples
            svm_clf = svm.SVC(C=0.5,
                              kernel='rbf',
                              decision_function_shape='ovo',
                              random_state=42,
                              gamma='scale')
            svm_clf.fit(neighbors, neighbors_labels)
            label = svm_clf.predict(test_data[idx].reshape(1, -1))
            result[idx] = label

    accuracy = accuracy_score(test_labels, result)

    print('knn_SVM')
    print("Accuracy: {}".format(accuracy))
    print("Balanced Accuracy: {}".format(
        balanced_accuracy_score(test_labels, result)))

    if plot:
        plot_confusion_matrix(test_labels, result, normalize=True)

    return accuracy  #, knn_clf
예제 #6
0
def SVM(train_data,
        test_data,
        train_labels,
        test_labels,
        preprocess='hog',
        plot=False):

    train_labels = np.int32(train_labels)
    test_labels = np.int32(test_labels)

    # --------------------------------------------------------------------------- #
    # --------------------------- HoG preprocessing ----------------------------- #

    if preprocess == 'hog':

        train_data, test_data = hog_processing(train_data, test_data)
        #Standardization
        scaler = preprocessing.StandardScaler().fit(train_data)
        train_data = scaler.transform(train_data)
        test_data = scaler.transform(test_data)

# --------------------------------------------------------------------------- #
# ---------------------------- PCA preprocessing ---------------------------- #

    elif preprocess == 'pca':

        train_data, test_data, pca = pca_processing(train_data,
                                                    test_data,
                                                    explained_variance=0.85)

# --------------------------------------------------------------------------- #
# ---------------------------- sklearn - SVM -------------------------------- #

    clf = svm.SVC(decision_function_shape='ovo',
                  probability=False,
                  gamma='scale',
                  coef0=0.5,
                  random_state=42)
    clf.fit(train_data, train_labels)

    result = clf.predict(test_data)
    accuracy = accuracy_score(test_labels, result)

    print('SVM')
    print("Accuracy: {}".format(accuracy))
    print("Balanced Accuracy: {}".format(
        balanced_accuracy_score(test_labels, result)))

    if plot:
        plot_confusion_matrix(test_labels, result, normalize=True)


# --------------------------------------------------------------------------- #
# ------------------------- Save classifier to Disk ------------------------- #

#    with open('svm_data_aug.pkl', 'wb') as fout:
#      pickle.dump((scaler, clf), fout)

#    with open('svm_data_aug.pkl', 'rb') as fin:
#      scaler, clf = pickle.load(fin)

    return accuracy, clf