Example #1
0
def hnswlibTok(X,eps,min_Pts):                  #使用HNSW查找每个数据点的最近邻
    # dim = len(X[0])
    # data_lables=range(len(X))
    # p = hnswlib.Index(space='l2', dim=dim)
    # p.init_index(max_elements=len(X), ef_construction=200, M=20)
    # p.add_items(X,data_lables)
    # p.set_ef(50)
    # labels,distance = p.knn_query(X, k=len(X))       #len(X)

    tree = KDTree(X, leaf_size=50)
    dist, labels = tree.query(X, k=len(X))

    neighbor_list=[]
    omega_list=[]       #核心对象集合
    for i in labels:
        centers=X[i[0]]
        center_neighbor=i
        dist_list=[]
        for j in range(1,len(i)):
            curr=X[i[j]]
            dist = np.sqrt(np.sum(np.square(centers- curr)))
            dist_list.append(dist)

            if dist>eps:                                #找到小于半径的截至索引位置
                center_neighbor=center_neighbor[0:j]
                break
        neighbor_list.append(set(center_neighbor))
        if len(neighbor_list[-1]) >= min_Pts:
            omega_list.append(i[0])  # 将样本加入核心对象集合
    omega_list = set(omega_list)  # 转化为集合便于操作

    return neighbor_list,omega_list
Example #2
0
 def metric(self, X, Y, n_features=None, dist_func=euclidean):
     small, big = (X, Y) if len(X) > len(Y) else (Y, X)
     small = small.reshape(-1, n_features)
     big = big.reshape(-1, n_features)
     kdtree = KDTree(big)
     result, _ = kdtree.query(small)
     result = self.linkage(result)
     return result
Example #3
0
def get_bags_of_words(image_paths):
    '''
    This function should take in a list of image paths and calculate a bag of
    words histogram for each image, then return those histograms in an array.

    Inputs:
        image_paths: A Python list of strings, where each string is a complete
                     path to one image on the disk.

    Outputs:
        An nxd numpy matrix, where n is the number of images in image_paths and
        d is size of the histogram built for each image.

    Use the same hog function to extract feature vectors as before (see
    build_vocabulary). It is important that you use the same hog settings for
    both build_vocabulary and get_bags_of_words! Otherwise, you will end up
    with different feature representations between your vocab and your test
    images, and you won't be able to match anything at all!

    After getting the feature vectors for an image, you will build up a
    histogram that represents what words are contained within the image.
    For each feature, find the closest vocab word, then add 1 to the histogram
    at the index of that word. For example, if the closest vector in the vocab
    is the 103rd word, then you should add 1 to the 103rd histogram bin. Your
    histogram should have as many bins as there are vocabulary words.

    Suggested functions: scipy.spatial.distance.cdist, np.argsort,
                         np.linalg.norm, skimage.feature.hog
    '''

    vocab = np.load('vocab.npy')
    print('Loaded vocab from file.')

    #TODO: Implement this function!
    vocab_mat = np.load('vocab.npy')
    vocab_size = len(image_paths)
    tree = KDTree(vocab_mat)
    cluster_SIFT_features = []
    sift = cv2.xfeatures2d.SIFT_create()
    for image_path in tqdm(image_paths, desc='SIFT'):
        image_bag = [0] * vocab_size
        image = cv2.imread(image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        locations, SIFT_features = sift.detectAndCompute(gray, None)
        temp = SIFT_features.tolist()
        nearest_dist, nearest_ind = tree.query(temp, k=1)
        for index in nearest_ind:
            image_bag[int(index)] += 1
        cluster_SIFT_features.append(image_bag)
    return cluster_SIFT_features
Example #4
0
def median_smallest_distance(points, tree=None):
    """Median over all points of the distance to their closest neighbor.

    This gives an idea of the "grid size" of a point dataset.
    """
    points = numpy.array(points)
    if tree is None:
        # points = numpy.unique(points, axis=0)  # Too slow
        points = numpy.array(list(set(tuple(p) for p in points)))
        tree = KDTree(points)

    # Get the minimum distances to neighbors for a sample of points
    rnd = numpy.random.RandomState(89)
    sample_size = min(len(points), 100)
    sample_idx = rnd.choice(len(points), sample_size, replace=False)
    sample = points[sample_idx]
    distances, _ = tree.query(sample, k=2, return_distance=True)

    # Return the median of that
    return numpy.median(distances[:, 1])