Ejemplo n.º 1
0
def knn_classification(k, dist_func, X_train, Y_train, X_predict):
    (m_examples, n_dimensions) = X_train.shape

    # use kd tree structure for knn searching
    labelled_points = np.append(X_train,
                                Y_train.reshape(m_examples, 1),
                                axis=1)
    t = KDTree.build_tree(labelled_points, n_dimensions)

    # store results in the predictions vector
    Y_predict = np.empty(X_predict.shape[0])

    # record the number of points searched for benchmark/comparison purposes
    total_points_searched = 0

    # perform knn search for each test data
    for i, x in enumerate(X_predict):
        (labelled_nearest_neighbors, _, search_space_size) = \
                KDTree.knn_search(t, x, k, n_dimensions, dist_func)

        # nearest neighbor labels are the last column
        nearest_neighbors_labels = np.array(labelled_nearest_neighbors)[:, -1]
        Y_predict[i] = mode_with_random_tie_breaking(nearest_neighbors_labels)
        total_points_searched += search_space_size

    return Y_predict
Ejemplo n.º 2
0
def one_vs_all_knn_classification(k, dist_func, X_train, Y_train, X_predict):
    (m_examples, n_dimensions) = X_train.shape

    # use kd tree structure for knn searching
    train_indices = (np.arange(0, m_examples)).reshape(m_examples, 1)
    indexed_points = np.append(X_train, train_indices, axis=1)
    t = KDTree.build_tree(indexed_points, n_dimensions)

    # store results in the predictions vector
    Y_predict = np.empty(X_predict.shape[0])

    # perform knn search for each test data
    for i, x in enumerate(X_predict):
        indexed_nearest_neighbors = \
                KDTree.knn_search(t, x, k, n_dimensions, dist_func)[0]

        # http://en.wikipedia.org/wiki/Multiclass_classification
        # use one-vs-all strategy to predict the label
        possible_labels = set(Y_train)  # supposing that each class has at \
        # least one representative ...
        zero_based_indexed_integer_labels = range(0, len(possible_labels))
        assert possible_labels.issubset(zero_based_indexed_integer_labels), \
               "accept only zero-based indexed, integer labels"

        # the predicted label will be the one from the classifier that gives
        # the most votes, so store the votes in a table
        classifier_votes_tab = {
            c: 0
            for c in zero_based_indexed_integer_labels
        }

        for c in zero_based_indexed_integer_labels:
            Y_c = np.zeros(m_examples)
            Y_c[Y_train == c] = 1

            # neighbor indices are the last column
            nearest_neighbors_indices = np.array(indexed_nearest_neighbors)[:,
                                                                            -1]
            votes = int(sum(Y_c[nearest_neighbors_indices.astype(int)]))
            classifier_votes_tab[c] = votes

        flattened_table = list(Counter(classifier_votes_tab).elements())
        Y_predict[i] = mode_with_random_tie_breaking(flattened_table)

    return Y_predict
Ejemplo n.º 3
0
def one_vs_all_knn_classification(k, dist_func, X_train, Y_train, X_predict):
    (m_examples, n_dimensions) = X_train.shape

    # use kd tree structure for knn searching
    train_indices = (np.arange(0, m_examples)).reshape(m_examples, 1)
    indexed_points = np.append(X_train, train_indices, axis=1)
    t = KDTree.build_tree(indexed_points, n_dimensions)

    # store results in the predictions vector
    Y_predict = np.empty(X_predict.shape[0])
 
    # perform knn search for each test data
    for i, x in enumerate(X_predict):
        indexed_nearest_neighbors = \
                KDTree.knn_search(t, x, k, n_dimensions, dist_func)[0]
 
        # http://en.wikipedia.org/wiki/Multiclass_classification
        # use one-vs-all strategy to predict the label
        possible_labels = set(Y_train) # supposing that each class has at \
                                       # least one representative ...
        zero_based_indexed_integer_labels = range(0, len(possible_labels))
        assert possible_labels.issubset(zero_based_indexed_integer_labels), \
               "accept only zero-based indexed, integer labels"

        # the predicted label will be the one from the classifier that gives
        # the most votes, so store the votes in a table
        classifier_votes_tab = {c: 0 for c in zero_based_indexed_integer_labels}

        for c in zero_based_indexed_integer_labels:
            Y_c = np.zeros(m_examples)
            Y_c[Y_train == c] = 1
            
            # neighbor indices are the last column
            nearest_neighbors_indices= np.array(indexed_nearest_neighbors)[:,-1]
            votes = int(sum(Y_c[nearest_neighbors_indices.astype(int)]))
            classifier_votes_tab[c] = votes

        flattened_table = list(Counter(classifier_votes_tab).elements())
        Y_predict[i] = mode_with_random_tie_breaking(flattened_table)

    return Y_predict
Ejemplo n.º 4
0
def knn_classification(k, dist_func, X_train, Y_train, X_predict):
    (m_examples, n_dimensions) = X_train.shape

    # use kd tree structure for knn searching
    labelled_points = np.append(X_train, Y_train.reshape(m_examples,1),axis=1)
    t = KDTree.build_tree(labelled_points, n_dimensions)

    # store results in the predictions vector
    Y_predict = np.empty(X_predict.shape[0])

    # record the number of points searched for benchmark/comparison purposes
    total_points_searched = 0

    # perform knn search for each test data
    for i, x in enumerate(X_predict):
        (labelled_nearest_neighbors, _, search_space_size) = \
                KDTree.knn_search(t, x, k, n_dimensions, dist_func)

        # nearest neighbor labels are the last column
        nearest_neighbors_labels = np.array(labelled_nearest_neighbors)[:,-1]
        Y_predict[i] = mode_with_random_tie_breaking(nearest_neighbors_labels)
        total_points_searched += search_space_size
        
    return Y_predict
Ejemplo n.º 5
0
def main(data_dir):
    # kNN & RNN configuration:
    leaf_size = 32
    min_extent = 0.0001
    k = 8
    radius = 1

    # kitti velodyne
    cat = os.listdir(data_dir)
    iteration_num = len(cat)

    print("OCTree --------------")
    construction_time_sum = 0
    knn_time_sum = 0
    radius_time_sum = 0
    brute_time_sum = 0
    for i in range(iteration_num):
        filename = os.path.join(data_dir, cat[i])
        point_cloud = read_velodyne_bin(filename)

        # build tree:
        begin_t = time.time()
        octree = OCTree(point_cloud=point_cloud,
                        leaf_size=leaf_size,
                        min_extent=min_extent)
        construction_time_sum += time.time() - begin_t

        query = point_cloud[0, :]

        # kNN query:
        begin_t = time.time()
        knn_result_set = KNNResultSet(capacity=k)
        octree.knn_search(query, knn_result_set)
        knn_time_sum += time.time() - begin_t

        # RNN query:
        begin_t = time.time()
        rnn_result_set = RadiusNNResultSet(radius=radius)
        octree.rnn_fast_search(query, rnn_result_set)
        radius_time_sum += time.time() - begin_t

        # brute force:
        begin_t = time.time()
        diff = np.linalg.norm(point_cloud - query, axis=1)
        nn_idx = np.argsort(diff)
        nn_dist = diff[nn_idx]
        brute_time_sum += time.time() - begin_t

    print("Octree: build %.3f, knn %.3f, radius %.3f, brute %.3f" %
          (construction_time_sum * 1000 / iteration_num, knn_time_sum * 1000 /
           iteration_num, radius_time_sum * 1000 / iteration_num,
           brute_time_sum * 1000 / iteration_num))

    print("KDTree --------------")
    construction_time_sum = 0
    knn_time_sum = 0
    radius_time_sum = 0
    brute_time_sum = 0
    for i in range(iteration_num):
        filename = os.path.join(data_dir, cat[i])
        point_cloud = read_velodyne_bin(filename)

        # build tree:
        begin_t = time.time()
        kd_tree = KDTree(point_cloud=point_cloud, leaf_size=leaf_size)
        construction_time_sum += time.time() - begin_t

        query = point_cloud[0, :]

        # kNN query:
        begin_t = time.time()
        knn_result_set = KNNResultSet(capacity=k)
        kd_tree.knn_search(query, knn_result_set)
        knn_time_sum += time.time() - begin_t

        # RNN query:
        begin_t = time.time()
        rnn_result_set = RadiusNNResultSet(radius=radius)
        kd_tree.rnn_search(query, rnn_result_set)
        radius_time_sum += time.time() - begin_t

        # brute force:
        begin_t = time.time()
        diff = np.linalg.norm(point_cloud - query, axis=1)
        nn_idx = np.argsort(diff)
        nn_dist = diff[nn_idx]
        brute_time_sum += time.time() - begin_t

    print("Kdtree: build %.3f, knn %.3f, radius %.3f, brute %.3f" %
          (construction_time_sum * 1000 / iteration_num, knn_time_sum * 1000 /
           iteration_num, radius_time_sum * 1000 / iteration_num,
           brute_time_sum * 1000 / iteration_num))