예제 #1
0
def main(data_path_head, data_path_tail, args):
    ### ********** LOAD DATA ********** ###
    data = load_raw_data(data_path_tail, data_path_head)

    ### ********** PREPROCESS DATA ********** ###
    data[:, :3] = PcaRotation(data[:, :3])
    data[:, :3] = ScaleData(data[:, :3])

    ### ********** THREE BEST CONFIGURATIONS FROM REPORT "GRID SEARCH" ******** ###
    rpca_testing_windows = [[225, 225], [225, 225], [125, 125]]
    rpca_testing_windows_types = ['rectangle', 'ellipse', 'rectangle']
    rpca_testing_overlaps = [0.5, 0.8, 0.75]
    rpca_testing_confidence = [1.5, 2, 2.5]

    ### ********** PERFROM RPCA ON NEW DATASET WITH DEFINED CONFIGURATIONS ********* ###
    for window, window_type, overlap, confidence in zip(
            rpca_testing_windows, rpca_testing_windows_types,
            rpca_testing_overlaps, rpca_testing_confidence):
        # Define Naming Strings
        save_path = args.save_path + "/RPCA_generalization_voting"
        string_overlap = "{:.2f}".format(overlap).replace(".", "")
        classifier_name = "RPCA_{}_{}x{}_ol{}".format(window_type, window[0],
                                                      window[1],
                                                      string_overlap)

        # Perfrom rpca and save it
        rpca = RobustPCAGrid(window,
                             max_iter=2000,
                             overlap=overlap,
                             window_type=window_type,
                             predict_method='voting',
                             confidence=confidence,
                             name=classifier_name)
        rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify(
            rpca,
            train_data=data[:, :3],
            train_lbls=data[:, -1],
            test_data=data[:, :3],
            test_lbls=data[:, -1],
            stats=args.stats,
            save=True,
            save_dir=save_path)

        # Create results matrix and print to stdout
        conf_mat = matrix_string(rpca_confusion_matrix)
        print("*** {} ***".format(classifier_name))
        print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat))
예제 #2
0
def main(head, tail, classification_path):
    ### ********** LOAD DATA ********** ###
    data = load_raw_data(tail, head)

    # Open specified file
    test_prediction = np.loadtxt(classification_path)

    ScatterPlot3D(data[:, :3], labels=test_prediction, title="Test Prediction")
    filt_data = np.delete(data,
                          np.argwhere(test_prediction == 1)[:, 0],
                          axis=0)
    ScatterPlot3D(filt_data[:, :3],
                  labels=filt_data[:, -1],
                  title="Filtered Dataset With true labels")

    if sys.flags.interactive != 1:
        vispy.app.run()
예제 #3
0
def main(head, tail):
    ### ********** LOAD DATA ********** ###
    data = load_raw_data(tail, head)

    ### ********** FILTER DATA ********** ###
    filt_data = np.delete(data[:, :3],
                          np.argwhere(data[:, -1] == 1)[:, 0],
                          axis=0)
    zeros = np.zeros(len(filt_data))
    zeros[-1] = 1

    ### ********** PLOTTING *********** ###
    ScatterPlot3D(filt_data, labels=zeros, title="True Samples Only")
    ScatterPlot3D(data, labels=data[:, -1], title="All Data")

    if sys.flags.interactive != 1:
        vispy.app.run()
예제 #4
0
def main(head, tail, classification_path):
    ### ********** LOAD DATA ********** ###
    data = load_raw_data(tail, head)
    ### ********** PREPROCESS DATA ********** ###
    data[:, :3] = PcaRotation(data[:, :3])
    data[:, :3] = ScaleData(data[:, :3])

    # Open specified file
    test_prediction = np.loadtxt(classification_path)

    filt_data = np.delete(data[:, :3],
                          np.argwhere(test_prediction == 1)[:, 0],
                          axis=0)
    reconstr = surface_reconstruction(filt_data, resolution=[500, 200])
    ScatterPlot3D(reconstr,
                  labels=np.zeros(len(reconstr)),
                  title="RPCA: Surface Estimation")

    if sys.flags.interactive != 1:
        vispy.app.run()
예제 #5
0
def main(head,tail,rpca_path,s_idx,classification_path=None):
      ### ********** LOAD DATA ********** ###
      data = load_raw_data(tail, head)
      if classification_path:
            # Open specified file
            test_prediction= np.loadtxt(classification_path)

      # Open specified file
      with open(rpca_path, 'rb') as f:
            rpca = dill.load(f)
      if s_idx<=0:
            ScatterPlot3D(rpca.get_full_S(),labels=data[:,-1], title="Full S of {} With True labels".format(rpca.name) )
            if classification_path:
                  ScatterPlot3D(rpca.get_full_S(),labels=test_prediction, title="Full S of {} With Classified labels".format(rpca.name) )
      else:
            ScatterPlot3D(rpca.S_list[s_idx],labels=data[rpca.sample_list[s_idx],-1], title="Subset S with index {} of {} With True labels".format(s_idx,rpca.name) )
            if classification_path:
                  ScatterPlot3D(rpca.S_list[s_idx],labels=test_prediction[s_idx], title="Subset S with index {} of {} With Classified labels".format(s_idx,rpca.name) )
      if sys.flags.interactive != 1:
                  vispy.app.run()
예제 #6
0
def main(data_path_head, data_path_tail, args):
    ### ********** LOAD DATA ********** ###
    print("-- Loading Data  --\n")
    data = load_raw_data(data_path_tail, data_path_head)
    print("Noise Ratio in data %f" %
          (np.count_nonzero(data[-1]) / data.shape[0]))

    ### ********** PREPROCESS DATA ********** ###
    print("\n-- Preprocessing Data  --\n")
    # preprocess true coordinates
    data[:, :3] = PcaRotation(data[:, :3])
    data[:, :3] = ScaleData(data[:, :3])
    # preproess features
    data[:, 3:-2] = ScaleFeatures(data[:, 3:-2])

    ### ********** CREATE FEATURES ********** ###
    # dist = 1
    # dtp_feature = dist_to_plane(data[:,:3],dist)

    # k=[3,6,9,12,15]
    # kmean_dist_feature = knn_mean_dist(data[:,:3],[3,6,9,12,15])
    # kmean_z_dist_feature = knn_mean_z_dist(data[:,:3],[3,6,9,12,15])
    # kmaxd_feature= knn_max_dist(data[:,:3],k)

    # radius = [0.1 0.2 0.4 0.8 1]
    # #n_in_sphere_feature = samples_within_sphere(data[:,:3],radius)
    # #cent_z_sum_sphere_feature = centered_z_summation_within_sphere(data[:,0:3],radius)

    # # Write code to incorporate features in the dataset
    # data = data ### !!!

    ### ********** TRAIN TEST SPLIT ********** ###
    chosen_features = np.array([1, 2, 3])
    features_incl_lbls = np.append(chosen_features, -1)
    train_data, train_lbls, test_data, test_lbls = split_data(
        data[:, features_incl_lbls], test_size=0.25)

    ### ********** Nearest Neighbors Classifier ********** ###
    if args.knn is True:
        print("-- Performing kNN  --")
        k_neighbors = 1
        knn = KNearestNeighbors(k=k_neighbors)
        knn, knn_prediction, knn_score, knn_err, knn_confusion_matrix = classify(
            knn,
            train_data,
            train_lbls,
            test_data,
            test_lbls,
            stats=args.stats)
        conf_mat = matrix_string(knn_confusion_matrix)
        print("\tkNN k=%d Score: %.6f \n %s" %
              (k_neighbors, knn_score, conf_mat))

    ### ********** Naive Bayes Classifier********** ###
    if args.nb is True:
        print("-- Performing Naive Bayes --")
        nb = NaiveBayes()
        nb, nb_prediction, nb_score, nb_err, nb_confusion_matrix = classify(
            nb,
            train_data,
            train_lbls,
            test_data,
            test_lbls,
            save=True,
            stats=args.stats)
        conf_mat = matrix_string(nb_confusion_matrix)
        print("\tnb Score: %.6f \n %s" % (nb_score, conf_mat))

    ### ********** Linear/Non-linear SVM Classifier********** ###
    if args.svm is True:
        print("-- Performing Linear SVM --")
        lin_svm = SupportVectorMachine(max_iter=200)
        lin_svm, lin_svm_pred, lin_svm_score, lin_svm_err, lin_confusion_matrix = classify(
            lin_svm,
            train_data,
            train_lbls,
            test_data,
            test_lbls,
            stats=args.stats)
        conf_mat = matrix_string(lin_confusion_matrix)
        print("\tLinear SVM Score: %.6f \n %s" % (lin_svm_score, conf_mat))

        print("-- Performing Kernel SVM --")
        kern_svm = SupportVectorMachine(kernel='rbf',
                                        max_iter=1000,
                                        gamma='auto',
                                        C=0.1,
                                        tol=1e-3)
        kern_svm, kern_svm_pred, kern_svm_score, kern_svm_err, kern_confusion_matrix = classify(
            kern_svm,
            train_data,
            train_lbls,
            test_data,
            test_lbls,
            stats=args.stats)
        conf_mat = matrix_string(kern_confusion_matrix)
        print("\tKernel SVM Score: %.6f \n %s" % (kern_svm_score, conf_mat))

    ### ********** Robust PCA ********** ###
    if args.rpca is True:
        print("-- Performing Robust PCA --")
        rpca = RobustPCAGrid([101, 101],
                             max_iter=2000,
                             overlap=0,
                             window_type='rectangle',
                             predict_method='voting')
        rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify(
            rpca,
            train_data=data[:, :3],
            train_lbls=data[:, -1],
            test_data=data[:, :3],
            test_lbls=data[:, -1],
            stats=args.stats,
            save=False)
        conf_mat = '\t[' + ']\n\t['.join('\t'.join('%0.3f' % x for x in y)
                                         for y in rpca_confusion_matrix) + ']'
        print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat))
        if args.figsshow:
            ScatterPlot3D(data,
                          labels=rpca_prediction,
                          x_feat=0,
                          y_feat=1,
                          z_feat=2,
                          title="RPCA: Predictions")
            filt_data = np.delete(data,
                                  np.argwhere(rpca_prediction == 1)[:, 0],
                                  axis=0)
            ScatterPlot3D(filt_data,
                          labels=np.zeros(len(filt_data)),
                          title="RPCA: Noise Filtered")

            #reconstr = surface_reconstruction(filt_data, resolution=[500,500])
            #ScatterPlot3D(reconstr,labels=np.zeros(len(reconstr)), title="RPCA: Surface Estimation")

    ### ********** PLOTTING DATA ********** ####
    """ScatterPlot3D(data, labels=training_labels, x_feat=0, y_feat=1, z_feat=2, label_feat=-1, title="Scatterplot")"""
예제 #7
0
def main(data_path_head, data_path_tail, args):
    ### ********** LOAD DATA ********** ###
    print("-- Loading Data  --\n")
    data = load_raw_data(data_path_tail, data_path_head)

    ### ********** PREPROCESS DATA ********** ###
    print("\n-- Preprocessing Data  --\n")
    data[:, :3] = PcaRotation(data[:, :3])
    data[:, :3] = ScaleData(data[:, :3])

    rpca_testing_windows = [[25, 25], [75, 75], [125, 125], [175, 175],
                            [225, 225]]
    rpca_testing_rect_overlaps = [0, 0.25, 0.5, 0.75]
    rpca_testing_ellip_overlaps = [0.5, 0.6, 0.7, 0.8]
    rpca_testing_sigmas = [1.5, 2, 2.5]

    ### ********** Perfrom Grid search of defined parameters with Robust PCA ********** ###
    for sigma in rpca_testing_sigmas:
        save_path = args.save_path + "_std{}".format(sigma).replace(".", "")
        for window in rpca_testing_windows:
            for overlap in rpca_testing_rect_overlaps:
                save_path_rectangle = save_path + "/RPCA_Rect_voting"
                string_overlap = "{:.2f}".format(overlap).replace(".", "")
                classifier_name = "RPCA_Rect_{}x{}_ol{}".format(
                    window[0], window[1], string_overlap)
                rpca = RobustPCAGrid(window,
                                     max_iter=2000,
                                     overlap=overlap,
                                     window_type='rectangle',
                                     predict_method='voting',
                                     name=classifier_name)
                rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify(
                    rpca,
                    train_data=data[:, :3],
                    train_lbls=data[:, -1],
                    test_data=data[:, :3],
                    test_lbls=data[:, -1],
                    stats=args.stats,
                    save=True,
                    save_dir=save_path_rectangle)
                conf_mat = matrix_string(rpca_confusion_matrix)
                print("*** {} ***".format(classifier_name))
                print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat))

            for overlap in rpca_testing_ellip_overlaps:
                save_path_ellipse = save_path + "/RPCA_Ellipse_voting"
                string_overlap = "{:.2f}".format(overlap).replace(".", "")
                classifier_name = "RPCA_Ellipse_{}x{}_ol{}".format(
                    window[0], window[1], string_overlap)
                rpca = RobustPCAGrid(window,
                                     max_iter=2000,
                                     overlap=overlap,
                                     window_type='ellipse',
                                     predict_method='voting',
                                     name=classifier_name)
                rpca, rpca_prediction, rpca_score, rpca_err, rpca_confusion_matrix = classify(
                    rpca,
                    train_data=data[:, :3],
                    train_lbls=data[:, -1],
                    test_data=data[:, :3],
                    test_lbls=data[:, -1],
                    stats=args.stats,
                    save=True,
                    save_dir=save_path_ellipse)
                conf_mat = matrix_string(rpca_confusion_matrix)
                print("*** {} ***".format(classifier_name))
                print("\trpca Score: %.6f \n %s" % (rpca_score, conf_mat))
def main(data_path_head,data_path_tail,args):   
      ### ********** LOAD DATA ********** ###
      print("-- Loading Data  --\n")
      data = load_raw_data(data_path_tail, data_path_head)
      
      ### ********** PREPROCESS DATA ********** ###
      print("\n-- Preprocessing Data  --\n")
      
      # preprocess true coordinates
      data[:,:3] = PcaRotation(data[:,:3])
      data[:,:3] = ScaleData(data[:,:3])
      
      # preproess features
      data[:,3:-1] = ScaleFeatures(data[:,3:-1])


      ### ********** TRAIN TEST SPLIT ********** ###
      eiva_feature_names = ["x",
                            "y",
                            "z", 
                            "Dist_to_neighbour",
                            "Dist_to_avg_surf_r80cm" , 
                            "Neighbours_in_sphere_r80cm" , 
                            "Z_sum_in_circ_r80cm" , 
                            "kNN_mean_Z_dist_n8" , 
                            "kNN_mean_dist_k8"]

      for feature,feature_name in enumerate(eiva_feature_names[3:]):
            feature=feature+3 # offset dont count x,y,z 
            feature_incl_labels = np.append(feature,-1)
            
            train_data, train_lbls, test_data, test_lbls = split_data(data[:,feature_incl_labels],test_size=0.25)
            
            ### ********** Nearest Neighbors Classifier ********** ###
            if args.knn is True:
                  print("-- Performing kNN  --")
                  for k_neighbors in 2**np.arange(9):
                        save_path = args.save_path + "/kNN"
                        classifier_name = "kNN_k{}_".format(k_neighbors)+"Feature{}={}".format(feature,feature_name)
                        knn = KNearestNeighbors(k=k_neighbors,name=classifier_name)
                        knn, knn_prediction, knn_score, knn_err, knn_confusion_matrix = classify(knn, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path)
                        conf_mat = matrix_string(knn_confusion_matrix)
                        print("\tkNN k=%d Score: %.6f \n %s" %(k_neighbors,knn_score,conf_mat))
            
            ### ********** Naive Bayes Classifier********** ###
            if args.nb is True:
                  print("-- Performing Naive Bayes --")
                  save_path = args.save_path + "/NB"
                  classifier_name ="NB_"+"Feature{}={}_".format(feature,feature_name)
                  nb = NaiveBayes(name=classifier_name)
                  nb, nb_prediction, nb_score, nb_err, nb_confusion_matrix = classify(nb, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path)
                  conf_mat = matrix_string(nb_confusion_matrix)
                  print("\tnb Score: %.6f \n %s" %(nb_score,conf_mat))

            ### ********** Linear/Non-linear SVM Classifier********** ###
            if args.svm is True:
                  for C in (2.**np.arange(-5,16,10)).tolist():
                        print(C)
                        for tol in (10.**np.arange( -5, 0, 2 )).tolist():
                              print("-- Performing Linear SVM --")
                              save_path = args.save_path + "/linSVM"
                              classifier_name = "LinSVM_C{}_tol{}_".format(C,tol)+"Feature{}={}_".format(feature,feature_name)
                              lin_svm = SupportVectorMachine(max_iter=200,C=C,tol=tol,name=classifier_name)
                              lin_svm, lin_svm_pred, lin_svm_score, lin_svm_err, lin_confusion_matrix = classify(lin_svm, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path)
                              conf_mat = matrix_string(lin_confusion_matrix)
                              print("\tLinear SVM Score: %.6f \n %s" %(lin_svm_score,conf_mat))
                              for gamma in (2.**np.arange(-10,0,3)).tolist():
                                    print("-- Performing Kernel SVM --")
                                    save_path = args.save_path + "/RbfSVm"
                                    classifier_name = "RbfSVM_C{}_tol{}_gamme{}".format(C,tol,gamma)+"Feature{}={}_".format(feature,feature_name)
                                    kern_svm = SupportVectorMachine(kernel='rbf', max_iter=200,gamma=gamma, C=C, tol=tol,name=classifier_name)
                                    kern_svm, kern_svm_pred, kern_svm_score, kern_svm_err, kern_confusion_matrix = classify(kern_svm, train_data, train_lbls, test_data, test_lbls, stats=args.stats, save=True, save_dir=save_path)
                                    conf_mat = matrix_string(kern_confusion_matrix)
                                    print("\tKernel SVM Score: %.6f \n %s" %(kern_svm_score,conf_mat))
예제 #9
0
                                     description='''Test of Robust PCA''')
    parser.add_argument(
        'path',
        help='Input path and name of file containing the data to be plotted')

    #  Parse Arguments
    args = parser.parse_args()

    # Path is a data file
    if os.path.exists(args.path):
        print("Test")
        # Get file name and path from argument
        head, tail = os.path.split(args.path)

        # Read data from file
        data = load_raw_data(tail, head)[:, :]
        ### ********** PREPROCESS DATA ********** ###
        print("\n-- Preprocessing Data  --\n")
        # preprocess true coordinates
        data[:, :3] = PcaRotation(data[:, :3])
        data[:, :3] = ScaleData(data[:, :3])

        # Grid RPCA
        rpca = RobustPCAGrid([101, 101],
                             max_iter=1000,
                             overlap=0.5,
                             window_type='rectangle',
                             predict_method='voting')  #50,50
        rpca.fit(data[:, :3])
        labels = rpca.predict(None)
        ScatterPlot3D(data[:, :3], labels=labels, title="Predicted labels")