Ejemplo n.º 1
0
def main():
    l = 2
    k = 8

    data_all_classes = load_bovw_train_set()
    data_all_classes_combined = np.concatenate(
        (data_all_classes[0], data_all_classes[1], data_all_classes[2]),
        axis=0)
    mean = np.mean(data_all_classes_combined, axis=0)
    #// Build cov matrix with training data all classes
    cov_mat = np.cov(data_all_classes_combined.T)

    #// Do Eigen analysis on training data all classes
    eigen_values, eigen_vectors = np.linalg.eig(cov_mat)
    for i in eigen_vectors:
        if (np.linalg.norm(i) - 1) == 0.001:
            print("alert!")

    index = eigen_values.argsort()[::-1]
    eigen_values_ordered = eigen_values[index]
    eigen_vectors_ordered = eigen_vectors[:, index]
    # plot_eigen_values(eigen_values_ordered, l)

    #// Pick the corresponding eigen vectors for each case and make reduced data of training and test data
    selected_eigen_vectors = np.asmatrix(eigen_vectors_ordered[:, :l])

    train_data_reduced = perform_data_reduction(data_all_classes,
                                                selected_eigen_vectors, l,
                                                mean)
    train_data_reduced_combined = np.concatenate(
        (train_data_reduced[0], train_data_reduced[1], train_data_reduced[2]),
        axis=0)
    # plot_univariate_data(train_data_reduced, l)
    # plot_bivariate_data(train_data_reduced, l)

    mu_each_class_each_k = np.zeros((3, k, l))
    pi_each_class_each_k = np.zeros((3, k))
    sigma_each_class_each_k = np.zeros((3, k, l, l))

    for class_index in range(len(train_data_reduced)):
        print("class", class_index)
        #// Do K - Means with reduced training data
        centers = find_initial_random_centers(train_data_reduced[class_index],
                                              k, l)
        centers_after_Kmeans, whcluster = gmm.computeKMC(
            train_data_reduced[class_index], centers, k)
        cluster_data = gmm.groupdata(train_data_reduced[class_index],
                                     centers_after_Kmeans, whcluster, k)

        #// Build GMM on training data
        log_likelihood_all_iters, pi_final_each_k, mu_final_each_k, sigma_final_each_k = gmm.compute_GMM(
            centers_after_Kmeans, train_data_reduced[class_index],
            cluster_data, k)
        plot_iters_vs_loglikelihood(log_likelihood_all_iters, 3, k,
                                    class_index, l)
        pi_each_class_each_k[class_index] = pi_final_each_k
        mu_each_class_each_k[class_index] = mu_final_each_k
        sigma_each_class_each_k[class_index] = sigma_final_each_k

    # Build Confusion matrix with test data
    data_all_classes_test = load_bovw_test_set()
    test_data_reduced = perform_data_reduction(data_all_classes_test,
                                               selected_eigen_vectors, l, mean)
    confusion_matrix = gmm.compute_confusion_matrix(test_data_reduced,
                                                    pi_each_class_each_k,
                                                    mu_each_class_each_k,
                                                    sigma_each_class_each_k, k,
                                                    0)
    print("\nconfusion_matrix")
    gmm.print_matrix(confusion_matrix)

    performance_matrix = gmm.find_performance_matrix(confusion_matrix)
    print("\nperformance_matrix")
    gmm.print_matrix(performance_matrix)

    class_accuracy = gmm.find_accuracy(confusion_matrix)
    print("\nclass accuracy: ", class_accuracy)
Ejemplo n.º 2
0
def main():
    l = 1
    opt_type = 2    #data set option
    k = 4           # Number of clusters
    btw_classes = 4     #plot contour plots between the selected classes
    dimension_of_data = 2          #please specify the dimension of data here.. 
    #Loads the required classes..    
    if(opt_type == 1):
        data_all_classes = loadcls.load_LS_trainingset()
        data_all_classes_test = loadcls.load_LS_test_set()
    elif(opt_type == 2):
        data_all_classes = loadcls.load_NLS_trainingset()
        data_all_classes_test = loadcls.load_NLS_test_set()
    elif(opt_type == 3):
        data_all_classes = loadcls.load_bovw_train_set()
        data_all_classes_test = loadcls.load_bovw_test_set()
     #arrange the information
#    elif (btw_classes == 4):
#        data_some_classes = data_all_classes
#        btw_num_of_classes = 3
#        class_names = ['C1','C2','C3']
#        selected_colors = [0, 1, 2]
     
    mean_two_classes, scatter_two_classes = [], []
    
    if( btw_classes == 4):
        data_cls0_cls1 = [data_all_classes[0], data_all_classes[1]]
        data_cls1_cls2 = [data_all_classes[1], data_all_classes[2]]
        data_cls0_cls2 = [data_all_classes[0], data_all_classes[2]]
#        data_cls0_cls1_test = [data_all_classes_test[0], data_all_classes_test[1]]
#        data_cls1_cls2_test = [data_all_classes_test[1], data_all_classes_test[2]]
#        data_cls0_cls2_test = [data_all_classes_test[0], data_all_classes_test[2]]
        
        cls01_cls12_cls02 = [data_cls0_cls1, data_cls1_cls2, data_cls0_cls2]
#        cls01_cls12_cls02_test = [data_cls0_cls1_test, data_cls1_cls2_test, data_cls0_cls2_test]
        proj_cls_all_combination = []
        pi_each_class_each_k_diff_combination, mu_each_class_each_k_diff_combination,sigma_each_class_each_k_diff_combination  = [], [], []
        W_diff_combinn = []
        for i in range(len(cls01_cls12_cls02)):
            data_some_classes_train = cls01_cls12_cls02[i]
            btw_num_of_classes = 2
            within_cls_scatterM, btw_cls_scatterM, mu1_minus_mu2 = find_Sb_and_Sw(data_some_classes_train, btw_num_of_classes)
            w, eigen_vector_max = find_project_vector(within_cls_scatterM, btw_cls_scatterM, mu1_minus_mu2)
            proj_tr_some_cls = find_projected_points(data_some_classes_train, w, eigen_vector_max)
            proj_cls_all_combination.append(proj_tr_some_cls)
            W_diff_combinn.append(w)             #eigen_vector_max is used for projection and w is not used.. we can interchange...
            train_data_reduced = proj_tr_some_cls
            
            mu_each_class_each_k = np.zeros((2, k, l))
            pi_each_class_each_k = np.zeros((2, k))
            sigma_each_class_each_k = np.zeros((2, k, l, l)) 
            
            for class_index in range(len(train_data_reduced)):
                print ("class", class_index)
            #// Do K - Means with reduced training data
                centers = find_initial_random_centers(train_data_reduced[class_index], k, l)
                centers_after_Kmeans, whcluster = gmm.computeKMC(train_data_reduced[class_index], centers, k)
                cluster_data = gmm.groupdata(train_data_reduced[class_index], centers_after_Kmeans, whcluster, k)
                
            #// Build GMM on training data
                log_likelihood_all_iters, pi_final_each_k, mu_final_each_k, sigma_final_each_k = gmm.compute_GMM(centers_after_Kmeans, train_data_reduced[class_index], cluster_data, k)
#                plot_iters_vs_loglikelihood(log_likelihood_all_iters, 3, k, class_index, l)
                pi_each_class_each_k[class_index] = pi_final_each_k
                mu_each_class_each_k[class_index] = mu_final_each_k
                sigma_each_class_each_k[class_index] = sigma_final_each_k
                
            pi_each_class_each_k_diff_combination.append(pi_each_class_each_k)
            mu_each_class_each_k_diff_combination.append(mu_each_class_each_k)
            sigma_each_class_each_k_diff_combination.append(sigma_each_class_each_k)
            
        conf_mat = compute_confusion_matrix(data_all_classes_test, pi_each_class_each_k_diff_combination, mu_each_class_each_k_diff_combination, sigma_each_class_each_k_diff_combination, W_diff_combinn, k)
        gmm.print_matrix(conf_mat)
        performance_matrix = gmm.find_performance_matrix(conf_mat)
        print("\nperformance_matrix")
        gmm.print_matrix(performance_matrix)
        class_accuracy = gmm.find_accuracy(conf_mat)
        print("\nclass accuracy: ", class_accuracy)
        
#        plot_decision_boundary(data_all_classes, pi_each_class_each_k_diff_combination, mu_each_class_each_k_diff_combination, sigma_each_class_each_k_diff_combination, W_diff_combinn, k)
#        plot_classes(proj_cls_all_combination[0][0], proj_cls_all_combination[0][1])
        plot_scatter(cls01_cls12_cls02[0][0], cls01_cls12_cls02[0][1], W_diff_combinn[0],proj_cls_all_combination[0])
        
        
            
#            data_some_classes_test = cls01_cls12_cls02_test[i]
#            proj_test_some_cls = find_projected_points(data_some_classes_test, w, eigen_vector_max)
#            wh_class_arr = similar_to_confusion_matrix(proj_test_some_cls, pi_each_class_each_k, mu_each_class_each_k, sigma_each_class_each_k, k)
    print ("here")
Ejemplo n.º 3
0
def kernel_fn(data_all_classes, data_all_classes_test, C, gamma):

    clf = svm.SVC(kernel='linear')  # Linear Kernel
    data_train = np.concatenate(
        (data_all_classes[0], data_all_classes[1], data_all_classes[2]),
        axis=0)
    data_test = np.concatenate(
        (data_all_classes_test[0], data_all_classes_test[1],
         data_all_classes_test[2]),
        axis=0)
    colorUse = ("red", "green", "blue")
    catColor = (u'#FFAFAF', u'#BBFFB9', u'#BBB9FF')
    cls0 = np.zeros(len(data_all_classes[0]))
    cls1 = np.zeros(len(data_all_classes[1]))
    cls2 = np.zeros(len(data_all_classes[2]))
    cls0.fill(0)
    cls1.fill(1)
    cls2.fill(2)
    cls_train = np.concatenate((cls0, cls1, cls2), axis=0)
    cls0 = np.zeros(len(data_all_classes_test[0]))
    cls1 = np.zeros(len(data_all_classes_test[1]))
    cls2 = np.zeros(len(data_all_classes_test[2]))
    cls0.fill(0)
    cls1.fill(1)
    cls2.fill(2)
    cls_test = np.concatenate((cls0, cls1, cls2), axis=0)
    x_min, x_max = data_train[:, 0].min() - 1, data_train[:, 0].max() + 1
    y_min, y_max = data_train[:, 1].min() - 1, data_train[:, 1].max() + 1
    h = abs((x_max / x_min) / 100)
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    X_plot = np.c_[xx.ravel(), yy.ravel()]
    # Create the SVC model object
    C = 1.0  # SVM regularization parameter
    svc = svm.SVC(kernel='linear', C=C,
                  decision_function_shape='ovr').fit(data_train, cls_train)
    Z = svc.predict(X_plot)
    Z = Z.reshape(xx.shape)
    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    plt.contourf(xx, yy, Z, cmap=plt.cm.tab10, alpha=0.3)
    plt.scatter(data_train[:, 0],
                data_train[:, 1],
                c=cls_train,
                cmap=plt.cm.Set1)
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    plt.xlim(xx.min(), xx.max())
    plt.title('SVM: Linear Kernel')
    y_pred = svc.predict(data_test)
    print(confusion_matrix(cls_test, y_pred))
    conf_mat = confusion_matrix(cls_test, y_pred)  #svm call
    perf_mat = gmm.find_performance_matrix(conf_mat)
    print(perf_mat)
    print(classification_report(cls_test, y_pred))

    #Polynomial kernel
    #    parameters = [{'kernel': ['rbf'], 'gamma': [1e-4, 1e-3, 0.01, 0.1, 0.2, 0.5],'C': [1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
    #    clf = GridSearchCV(svm.SVC(decision_function_shape='ovr'), parameters, cv=5)
    svc = svm.SVC(kernel='rbf',
                  C=C,
                  gamma=gamma,
                  decision_function_shape='ovr').fit(data_train, cls_train)
    #    clf.fit(data_train, cls_train)
    Z = svc.predict(X_plot)
    Z = Z.reshape(xx.shape)
    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    plt.contourf(xx, yy, Z, cmap=plt.cm.tab10, alpha=0.3)
    plt.scatter(data_train[:, 0],
                data_train[:, 1],
                c=cls_train,
                cmap=plt.cm.Set1)
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    plt.xlim(xx.min(), xx.max())
    plt.title('SVM: RBF Kernel')
    y_pred = svc.predict(data_test)
    print(confusion_matrix(cls_test, y_pred))
    conf_mat = confusion_matrix(cls_test, y_pred)  #svm call
    perf_mat = gmm.find_performance_matrix(conf_mat)
    print(perf_mat)
    print(classification_report(cls_test, y_pred))
    kernel_fn_another_two(data_all_classes, data_all_classes_test, C, gamma)
Ejemplo n.º 4
0
def supportvectors(data_all_classes, data_all_classes_test, C, gamma):
    data_train = np.concatenate(
        (data_all_classes[0], data_all_classes[1], data_all_classes[2]),
        axis=0)
    data_test = np.concatenate(
        (data_all_classes_test[0], data_all_classes_test[1],
         data_all_classes_test[2]),
        axis=0)
    cls0 = np.zeros(len(data_all_classes[0]))
    cls1 = np.zeros(len(data_all_classes[1]))
    cls2 = np.zeros(len(data_all_classes[2]))
    cls0.fill(0)
    cls1.fill(1)
    cls2.fill(2)
    cls_train = np.concatenate((cls0, cls1, cls2), axis=0)
    cls0 = np.zeros(len(data_all_classes_test[0]))
    cls1 = np.zeros(len(data_all_classes_test[1]))
    cls2 = np.zeros(len(data_all_classes_test[2]))
    cls0.fill(0)
    cls1.fill(1)
    cls2.fill(2)
    cls_test = np.concatenate((cls0, cls1, cls2), axis=0)
    x_min, x_max = data_train[:, 0].min() - 1, data_train[:, 0].max() + 1
    y_min, y_max = data_train[:, 1].min() - 1, data_train[:, 1].max() + 1
    h = abs((x_max / x_min) / 100)
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    X_plot = np.c_[xx.ravel(), yy.ravel()]
    # Create the SVC model object
    C = 1.0  # SVM regularization parameter
    svc = svm.SVC(kernel='rbf',
                  C=C,
                  gamma=gamma,
                  decision_function_shape='ovr').fit(data_train, cls_train)
    Z = svc.predict(X_plot)
    Z = Z.reshape(xx.shape)
    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    #    plt.contourf(xx, yy, Z, cmap=plt.cm.tab10, alpha=0.3)
    plt.contour(xx,
                yy,
                Z,
                colors='k',
                levels=[-1, 0, 1],
                alpha=0.5,
                linestyles=['--', '-', '--'])
    plt.scatter(svc.support_vectors_[:, 0],
                svc.support_vectors_[:, 1],
                s=100,
                linewidth=1,
                facecolors='b')
    plt.scatter(data_train[:, 0],
                data_train[:, 1],
                c=cls_train,
                cmap=plt.cm.Set1)
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    plt.xlim(xx.min(), xx.max())
    plt.title('SVM: RBF Kernel')
    y_pred = svc.predict(data_test)
    conf_mat = confusion_matrix(cls_test, y_pred)  #svm call
    perf_mat = gmm.find_performance_matrix(conf_mat)
    print(confusion_matrix(cls_test, y_pred))
    print(perf_mat)
    print(classification_report(cls_test, y_pred))
Ejemplo n.º 5
0
def bovw_predict(data_all_classes, data_all_classes_test, C, gamma):
    clf = svm.SVC(kernel='linear')  # Linear Kernel
    data_train = np.concatenate(
        (data_all_classes[0], data_all_classes[1], data_all_classes[2]),
        axis=0)
    data_test = np.concatenate(
        (data_all_classes_test[0], data_all_classes_test[1],
         data_all_classes_test[2]),
        axis=0)
    cls0 = np.zeros(len(data_all_classes[0]))
    cls1 = np.zeros(len(data_all_classes[1]))
    cls2 = np.zeros(len(data_all_classes[2]))
    cls0.fill(0)
    cls1.fill(1)
    cls2.fill(2)
    cls_train = np.concatenate((cls0, cls1, cls2), axis=0)
    cls0 = np.zeros(len(data_all_classes_test[0]))
    cls1 = np.zeros(len(data_all_classes_test[1]))
    cls2 = np.zeros(len(data_all_classes_test[2]))
    cls0.fill(0)
    cls1.fill(1)
    cls2.fill(2)
    cls_test = np.concatenate((cls0, cls1, cls2), axis=0)
    svc = svm.SVC(kernel='linear', C=C,
                  decision_function_shape='ovr').fit(data_train, cls_train)
    y_pred = svc.predict(data_test)
    conf_mat = confusion_matrix(cls_test, y_pred)  #svm call
    perf_mat = gmm.find_performance_matrix(conf_mat)
    print(conf_mat)
    print(perf_mat)
    print(classification_report(cls_test, y_pred))

    svc = svm.SVC(kernel='rbf',
                  C=C,
                  gamma=gamma,
                  decision_function_shape='ovr').fit(data_train, cls_train)
    y_pred = svc.predict(data_test)
    conf_mat = confusion_matrix(cls_test, y_pred)  #svm call
    perf_mat = gmm.find_performance_matrix(conf_mat)
    print(conf_mat)
    print(perf_mat)
    print(classification_report(cls_test, y_pred))

    svc = svm.SVC(kernel='poly',
                  degree=4,
                  gamma=gamma,
                  C=C,
                  decision_function_shape='ovr').fit(data_train, cls_train)
    y_pred = svc.predict(data_test)
    conf_mat = confusion_matrix(cls_test, y_pred)  #svm call
    perf_mat = gmm.find_performance_matrix(conf_mat)
    print(conf_mat)
    print(perf_mat)
    print(classification_report(cls_test, y_pred))

    svc = svm.SVC(kernel='sigmoid',
                  gamma=gamma,
                  C=C,
                  decision_function_shape='ovr').fit(data_train, cls_train)
    y_pred = svc.predict(data_test)
    conf_mat = confusion_matrix(cls_test, y_pred)  #svm call
    perf_mat = gmm.find_performance_matrix(conf_mat)
    print(conf_mat)
    print(perf_mat)
    print(classification_report(cls_test, y_pred))