Python NearestNeighbors.search Examples

Programming Language: Python

Namespace/Package Name: sklearn.neighbors

Class/Type: NearestNeighbors

Method/Function: search

Examples at hotexamples.com: 5

Python NearestNeighbors.search - 5 examples found. These are the top rated real world Python examples of sklearn.neighbors.NearestNeighbors.search extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NearestNeighbors(30)

set_params(30)

radius_neighbors_graph(30)

radius_neighbors(30)

kneighbors_graph(30)

kneighbors(30)

fit(30)

predict(12)

sum(9)

compute_neighbors(7)

search(5)

query(5)

__init__(4)

distances(3)

search_by_vector(3)

knnQueryBatch(3)

get_params(3)

add_data(3)

build(3)

createIndex(3)

addDataPointBatch(3)

transform(2)

add(2)

setQueryTimeParams(2)

ravel(2)

predict_proba(1)

_random_state(1)

decision_function(1)

score(1)

nonzero(1)

_graph_mode(1)

_fitid(1)

_cluster_mode(1)

train(1)

_n_clusters(1)

nn(1)

nn_index(1)

closest_neighbor(1)

fit_predict(1)

fit_transform(1)

flatten(1)

getNearestDist(1)

getNearestDistToMature(1)

get_feature_names(1)

kneighborgs(1)

find_nnf(1)

astype(1)

kneighbours(1)

append(1)

metric(1)

Example #1

Show file

File: data_helper_dense.py Project: Ramay7/GNNs

def find_edges(input, test, K):
    print(f"building kNN classifier ... ", end=" ")
    st_time = time.time()

    if kNN_type <= 3:
        input, test = input.todense(), test.todense()

    if kNN_type == 1:
        from sklearn.neighbors import NearestNeighbors
        tree = NearestNeighbors(n_neighbors=K + 1, algorithm='ball_tree').fit(input)
    elif kNN_type == 2:
        from scipy import spatial
        tree = spatial.KDTree(input)
    elif kNN_type == 3:
        from n2 import HnswIndex
        tree = HnswIndex(input.shape[1], distance_type) # distance_type in ['angular', 'L2']
        for index in tqdm(range(input.shape[0])):
            tree.add_data(input[index, :])
        tree.build(n_threads=10)
    elif kNN_type == 4:
        import pysparnn.cluster_index as ci
        input_num = input.shape[0]
        tree = ci.MultiClusterIndex(input, range(input_num))
    else:
        raise NotImplementedError
    print(f"time={time.time()-st_time:.3f}s")


    print("finding indices ... ", end=" ")
    if kNN_type == 1:
        _, indices = tree.kneighbors(test)
    elif kNN_type == 2:
        _, indices = tree.query(test, k=K + 1)
    elif kNN_type == 3:
        indices = []
        for i in tqdm(range(test.shape[0])):
            indices.append(tree.search_by_vector(test[i, :], k=K + 1))
    else:
        indices = tree.search(test, k=K+1, k_clusters=100, return_distance=False)
    print(f"time={time.time()-st_time:.3f}s")


    edge_list = []
    for index1, per in enumerate(indices):
        for index2 in per:
            index2 = int(index2)
            if index1 != index2:
                edge_list.append((index1, index2))
    print(f"done! .... time={time.time()-st_time:.3f}s")
    return edge_list

Example #2

Show file

File: feature_graphs.py Project: vreshniak/feature-driven-exemplar-inpainting

class NNF():
    def __init__(self,
                 image,
                 target_mask,
                 source_mask,
                 patch_size=(11, 11),
                 patch_weight=None,
                 num_neighbors=1):
        im_h, im_w, im_ch = image.shape

        if patch_weight is None:
            self.patch_weight = np.ones(patch_size, dtype=_im_dtype)

        self.patch_size = patch_size
        self.num_neighb = num_neighbors

        print("Build NNF index: ", end=" ")
        start = time.time()

        if _NN_algorithm != "PatchMatch":
            self.source_ind = op.masked_indices(source_mask)
            self.target_ind = op.masked_indices(target_mask)

            # convert array indices to patch indices
            pad = patch_size[0] // 2
            ind_y, ind_x = np.divmod(self.source_ind, im_w)
            self.source_ind = (ind_x - pad) + (ind_y - pad) * (im_w - 2 * pad)

            source_point_cloud = extract_patches_2d( image, patch_size=patch_size )[self.source_ind].reshape((self.source_ind.size,-1)) \
                   * np.repeat(np.sqrt(self.patch_weight),im_ch)

            # need this because of FLANN bug (?) with memory release
            self.target_point_cloud = np.zeros(
                (self.target_ind.size, source_point_cloud.shape[-1]),
                dtype=_im_dtype)

        if _NN_algorithm == "FLANN":
            self.nn = flann.FLANN()
            self.nn.build_index(source_point_cloud,
                                algorithm="kdtree",
                                trees=1)  #, log_level = "info")
        elif _NN_algorithm == "Sklearn":
            self.nn = NearestNeighbors(
                n_neighbors=num_neighbors,
                algorithm='kd_tree',
                metric='minkowski',
                n_jobs=-1)  #,metric_params={'w':self.patch_weight})
            self.nn.fit(X=source_point_cloud)
        elif _NN_algorithm == "FAISS":
            self.nn = faiss.IndexHNSWFlat(source_point_cloud.shape[1], 50)
            self.nn.add(source_point_cloud)

        if _NN_algorithm == "PatchMatch":
            self.nn = pm.PatchMatch(target_mask,
                                    source_mask,
                                    patch_size=patch_size,
                                    lambdas=np.ones_like(image,
                                                         dtype=_im_dtype))

        print('%f sec' % (time.time() - start))

    def calculate_nnf(self, image, init_guess=None):
        im_h, im_w, im_ch = image.shape

        print("Query NNF index: ", end=" ")
        start = time.time()

        if _NN_algorithm != "PatchMatch":
            ind_nnf = np.zeros((im_h * im_w, self.num_neighb), dtype='int32')
            dist_nnf = np.zeros((im_h * im_w, self.num_neighb))

            # convert array indices to patch indices
            pad = self.patch_size[0] // 2
            ind_y, ind_x = np.divmod(self.target_ind, im_w)
            ind = (ind_x - pad) + (ind_y - pad) * (im_w - 2 * pad)

            # need this because of FLANN bug (?) with memory release
            np.copyto(self.target_point_cloud,
                      extract_patches_2d(
                          image, patch_size=self.patch_size)[ind].reshape(
                              (self.target_ind.size, -1)),
                      casting='same_kind',
                      where=True)
            self.target_point_cloud *= np.repeat(np.sqrt(self.patch_weight),
                                                 im_ch)

        # note that "ind" are patch indices, not array indices
        if _NN_algorithm == "FLANN":
            ind, dist = self.nn.nn_index(self.target_point_cloud,
                                         self.num_neighb)
        elif _NN_algorithm == "Sklearn":
            dist, ind = self.nn.kneighbors(X=self.target_point_cloud,
                                           return_distance=True)
        elif _NN_algorithm == "FAISS":
            dist, ind = self.nn.search(self.target_point_cloud,
                                       self.num_neighb)

        if _NN_algorithm != "PatchMatch":
            ind = ind.reshape((ind.shape[0], self.num_neighb))
            dist = dist.reshape((dist.shape[0], self.num_neighb))

            # convert patch indices to array indices
            ind = self.source_ind[ind.ravel()]
            ind_y, ind_x = np.divmod(ind, im_w - 2 * pad)
            ind = (ind_x + pad) + (ind_y + pad) * im_w

            ind = np.reshape(ind, (-1, self.num_neighb))
            for n in range(self.num_neighb):
                ind_nnf[self.target_ind, :] = ind  #[:,n]
                dist_nnf[self.target_ind, :] = dist  #[:,n]
        elif _NN_algorithm == "PatchMatch":
            ind_nnf, dist_nnf = self.nn.find_nnf(image, init_guess=init_guess)

        print('%f sec' % (time.time() - start))

        ind_nnf = ind_nnf.reshape((-1, self.num_neighb))
        dist_nnf = dist_nnf.reshape((-1, self.num_neighb))

        return ind_nnf, dist_nnf

Example #3

Show file

File: data_helper_sparse.py Project: Ramay7/GNNs

def find_edges(input, test, K):
    print(f"\tbuilding kNN classifier ... ", end=" ")
    st_time = time.time()

    if kNN_type in [1, 2]:
        input, test = input.todense(), test.todense()

    if kNN_type == 1:
        from sklearn.neighbors import NearestNeighbors
        tree = NearestNeighbors(n_neighbors=K + 1, algorithm='ball_tree').fit(input)
    elif kNN_type == 2:
        from scipy import spatial
        tree = spatial.KDTree(input)
    elif kNN_type == 3:
        from n2 import HnswIndex
        tree = HnswIndex(input.shape[1], distance_type) # distance_type in ['angular', 'L2']
        for index in tqdm(range(input.shape[0])):
            tree.add_data(input[index, :])
        tree.build(n_threads=20)
    elif kNN_type == 4:
        import pysparnn.cluster_index as ci
        input_num = input.shape[0]
        tree = ci.MultiClusterIndex(input, range(input_num))
    elif kNN_type == 5:
        import nmslib
        M, efC = 30, 100
        index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post': 0}
        
        space_names = ['l2_sparse', 'cosinesimil_sparse'] # https://github.com/nmslib/nmslib/blob/master/manual/spaces.md
        space_name = space_names[0]
        data_type = nmslib.DataType.SPARSE_VECTOR
        tree = nmslib.init(method='hnsw', space=space_name, data_type=data_type)
        
        '''
        def calc_zero_rows(i):
            if input[i, :].getnnz() == 0:
                return 1
            else:
                return 0
        pool = Pool(num_threads)
        zero_row_num = sum(pool.map(calc_zero_rows, range(input.shape[0])))
        print(f"# zero rows in input = {zero_row_num}", end=" ")
        '''
        tree.addDataPointBatch(input)

        tree.createIndex(index_time_params, print_progress=True)
        # Setting query-time parameters
        efS = 100
        query_time_params = {'efSearch': efS}
        print('Setting query-time parameters', query_time_params, end=" ")
        tree.setQueryTimeParams(query_time_params)
    else:
        raise NotImplementedError
    print(f"time={time.time()-st_time:.3f}s")


    print("\tfinding indices ... ", end=" ")
    if kNN_type == 1:
        _, indices = tree.kneighbors(test)
    elif kNN_type == 2:
        _, indices = tree.query(test, k=K + 1)
    elif kNN_type == 3:
        indices = []
        for i in tqdm(range(test.shape[0])):
            indices.append(tree.search_by_vector(test[i, :], k=K + 1))
    elif kNN_type == 4:
        indices = tree.search(test, k=K+1, k_clusters=100, return_distance=False)
    elif kNN_type == 5:
        '''
        def calc_zero_rows2(i):
            if test[i, :].getnnz() == 0:
                return 1
            else:
                return 0
        pool = Pool(num_threads)
        zero_row_num = sum(pool.map(calc_zero_rows2, range(test.shape[0])))
        print(f"# zero rows in test = {zero_row_num}")
        '''

        indices_ = tree.knnQueryBatch(test, k=K+1, num_threads=num_threads)
        indices = [i[0] for i in indices_]
        del indices_
    else:
        raise NotImplementedError

    print(f"time={time.time()-st_time:.3f}s")


    edge_list = []
    for index1, per in enumerate(indices):
        assert len(per) == K+1, f"index1={index1} len(per)={len(per)} != K={K}"
        for index2 in per:
            index2 = int(index2)
            if index1 != index2:
                edge_list.append((index1, index2))
    print(f"\tget edges done! .... time={time.time()-st_time:.3f}s")
    return edge_list

Example #4

Show file

class CustomKMeans(object):
    def __init__(
            self,
            n_centers: int = 2,
            method: str = 'kmeans',
            max_iter: int = 30,
            max_iter_no_progress: int = 10,
            tol_progress: np.float = 1e-3,
            random_state: Optional[int] = 17,
            nn_km_branching: int = 32,  # branching for k-means tree
            nn_km_iter: int = 20,  # number of iterations per k-means step
            nn_kd_trees: int = 32,  # number of randomized trees to use
            nn_checks: int = 75,  # number of leaves to check in the search
            nn_autotune: np.float = -1,  # auto-tuning of nn parameters
            apply_fix: bool = False,
            save_log: bool = True,
            gpu_idx: int = 0,
            verbose: bool = False):

        assert method in {'kmeans', 'kdtree', 'exact', 'exact-gpu'}

        # FLANN hyperparameters
        # The whole list of FLANN parameters is available here:
        # https://github.com/mariusmuja/flann/blob/master/src/cpp/flann/flann.h
        self.params = {
            'kdtree': {
                'algorithm': 'kdtree',
                'num_neighbors': 1,
                'trees': nn_kd_trees,
                'checks': nn_checks,
                'target_precision': nn_autotune
            },
            'kmeans': {
                'algorithm': 'kmeans',
                'num_neighbors': 1,
                'branching': nn_km_branching,
                'iterations': nn_km_iter,
                'checks': nn_checks,
                'target_precision': nn_autotune
            },
            'exact': {
                'algorithm': 'exact'
            },
            'exact-gpu': {
                'algorithm': 'exact-gpu'
            }
        }

        # GPU parameters
        self.gpu = None
        self.gpu_idx = gpu_idx

        # Nearest neighbors search method set up
        if method in {'kmeans', 'kdtree'}:
            self.nn_search = pyflann.FLANN()
        elif method == 'exact':
            self.nn_search = NearestNeighbors(n_neighbors=1,
                                              algorithm='kd_tree',
                                              leaf_size=nn_checks,
                                              metric='minkowski',
                                              p=2,
                                              n_jobs=1)
        else:
            self.nn_search = None
            self.gpu = faiss.StandardGpuResources()

        self.n_centers = n_centers
        self.method = method
        self.max_iter = max_iter
        self.max_iter_no_progress = max_iter_no_progress
        self.tol_progress = tol_progress
        self.random_state = random_state
        self.verbose = verbose

        self._dim = None
        self._n_samples = None
        self.centers_ = None
        self.labels_ = None
        self.sqdist_ = None
        self.stats_ = None
        self.session_id = f'n_centers_{self.n_centers}-' + \
            '-'.join(
                [f'{param}_{val}' for param, val
                 in self.params[self.method].items()]
            )
        self.apply_fix = apply_fix
        self.log_ = []
        self.save_log = save_log

    def __reset_stats(self):
        self.stats_ = {
            'measure': [],  # the cost function
            'evaluation': [],  # time to evaluate labels
            'assignment': [],  # time to re-assign labels
            'n_centers': self.n_centers,
            'apply_fix': self.apply_fix,
            **self.params[self.method]
        }

    def __update_stats(self, measure: np.float, time_eval: np.float,
                       time_assign: np.float):
        self.stats_['measure'].append(measure)
        self.stats_['evaluation'].append(time_eval)
        self.stats_['assignment'].append(time_assign)

    def fit(self, data: np.ndarray) -> None:
        data = data.astype(np.float32)
        self._n_samples, self._dim = data.shape

        np.random.seed(self.random_state)

        # Initialize centers (we use uniform random initialization)
        self.centers_ = np.random.uniform(np.min(data),
                                          np.max(data),
                                          size=self.n_centers *
                                          self._dim).reshape(
                                              self.n_centers,
                                              self._dim).astype(data.dtype)

        # Start fitting
        self.__reset_stats()
        self.log_.append(
            f'Session id: {self.session_id} '
            f'at {datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")}')
        progress = self.max_iter_no_progress
        for it in range(self.max_iter):

            tic_it = time()

            # Evaluate labels and squared distances
            if self.method in {'kmeans', 'kdtree'}:
                labels_, sqdist = self.nn_search.nn(self.centers_, data,
                                                    **self.params[self.method])
                if self.apply_fix and it > 0:
                    reassigned = np.where(labels_ != self.labels_)[0]
                    if len(reassigned):
                        sqdist_checked = np.linalg.norm(
                            data[reassigned, :] -
                            self.centers_[self.labels_[reassigned], :],
                            axis=1)**2
                        correct = sqdist[reassigned] <= sqdist_checked.ravel()
                        incorrect = ~correct
                        to_assign = reassigned[correct]
                        to_leave = reassigned[incorrect]
                        if len(to_assign):
                            self.labels_[to_assign] = labels_[to_assign]
                        self.sqdist_ = sqdist
                        if len(to_leave):
                            self.sqdist_[to_leave] = sqdist_checked[incorrect]
                else:
                    self.labels_, self.sqdist_ = labels_, sqdist
            elif self.method == 'exact':
                self.nn_search.fit(self.centers_)
                self.sqdist_, self.labels_ = self.nn_search.kneighbors(
                    X=data, return_distance=True)
                self.sqdist_, self.labels_ = \
                    self.sqdist_.ravel()**2, self.labels_.ravel()
            else:
                index_flat = faiss.IndexFlatL2(self._dim)
                self.nn_search = faiss.index_cpu_to_gpu(
                    self.gpu, self.gpu_idx, index_flat)
                self.nn_search.add(self.centers_)
                self.sqdist_, self.labels_ = self.nn_search.search(data, 1)
                self.sqdist_, self.labels_ = \
                    self.sqdist_.ravel(), self.labels_.ravel()

            toc = time()
            t1 = toc - tic_it

            # Update centers
            tic = time()
            for label in range(self.n_centers):
                idx = np.where(self.labels_ == label)[0]
                if len(idx):
                    self.centers_[label] = data[idx].mean(axis=0)
                else:
                    # If the cluster is empty, move its center anyway
                    self.centers_[label] = self.centers_.mean(axis=0)
            toc_it = time()
            t2 = toc_it - tic

            # Print progress
            self.log_.append(f'\t--> iteration {it} '
                             f'has been finished -- {toc_it - tic_it:.3e}s')
            if self.verbose:
                print(self.log_[-1])

            # Check convergence
            p = np.bincount(self.labels_, weights=self.sqdist_).sum()
            p /= self._n_samples
            self.__update_stats(p, t1, t2)
            if it >= 1 and self.stats_['measure'][-2] - \
                    self.stats_['measure'][-1] < self.tol_progress:
                progress -= 1
                if not progress:
                    print(f'\nIteration {it}: '
                          f'no progress during '
                          f'last {self.max_iter_no_progress} iterations')
                    break
            else:
                progress = self.max_iter_no_progress

        # Save the log
        self.log_.append(self.time_report())
        if self.save_log:
            path = Path(f'../output/logs/'
                        f'{self.__class__.__name__}/').resolve()
            path.mkdir(parents=True, exist_ok=True)
            with open(f'{str(path)}/{self.session_id}.txt', 'w') as file:
                for line in self.log_:
                    file.write(f'{line}\n')

    def fit_predict(self, data: np.ndarray) -> np.ndarray:

        self.fit(data)

        return self.labels_

    def time_report(self) -> str:
        if self.stats_ is None:
            rep = 'No statistics are available.'
        else:
            t_eval = np.asarray(self.stats_['evaluation'])
            t_assign = np.asarray(self.stats_['assignment'])
            rep = f'\nEvaluation time per iteration:\n' \
                f'\tAVG. = {t_eval.mean()}s\n' \
                f'\tSTD. = {t_eval.std()}s\n' \
                f'Assignment time per iteration:\n' \
                f'\tAVG. = {t_assign.mean()}s\n\tSTD. = {t_assign.std()}s'
        return rep

Example #5

Show file

def find_edges(input, test, K, cluster_ids, query_ids):
    print(f"\tbuilding kNN classifier ... ", end=" ")
    st_time = time.time()

    if kNN_type in [1, 2]:
        input, test = input.todense(), test.todense()

    if kNN_type == 1:
        from sklearn.neighbors import NearestNeighbors
        tree = NearestNeighbors(n_neighbors=K + 1, algorithm='ball_tree').fit(input)
    elif kNN_type == 2:
        from scipy import spatial
        tree = spatial.KDTree(input)
    elif kNN_type == 3:
        from n2 import HnswIndex
        tree = HnswIndex(input.shape[1], distance_type) # distance_type in ['angular', 'L2']
        for index in tqdm(range(input.shape[0])):
            tree.add_data(input[index, :])
        tree.build(n_threads=20)
    elif kNN_type == 4:
        import pysparnn.cluster_index as ci
        input_num = input.shape[0]
        tree = ci.MultiClusterIndex(input, range(input_num))
    elif kNN_type == 5:
        import nmslib
        M, efC, num_threads = 30, 100, 10
        index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post': 0}
        space_name = 'cosinesimil_sparse'
        data_type = nmslib.DataType.SPARSE_VECTOR
        tree = nmslib.init(method='hnsw', space=space_name, data_type=data_type)
        
        print(f"type(input) = {type(input)} type(test)={type(test)}", end=" ")
        
        tree.addDataPointBatch(input)

        tree.createIndex(index_time_params)
        # Setting query-time parameters
        efS = 100
        query_time_params = {'efSearch': efS}
        print('Setting query-time parameters', query_time_params)
        tree.setQueryTimeParams(query_time_params)

    else:
        raise NotImplementedError
    print(f"time={time.time()-st_time:.3f}s")


    print("\tfinding indices ... ", end=" ")
    if kNN_type == 1:
        _, indices = tree.kneighbors(test)
    elif kNN_type == 2:
        _, indices = tree.query(test, k=K + 1)
    elif kNN_type == 3:
        indices = []
        for i in tqdm(range(test.shape[0])):
            indices.append(tree.search_by_vector(test[i, :], k=K + 1))
    elif kNN_type == 4:
        indices = tree.search(test, k=K+1, k_clusters=100, return_distance=False)
    elif kNN_type == 5:
        indices_ = tree.knnQueryBatch(test, k=K, num_threads=num_threads)
        indices = [i[0] for i in indices_]
        del indices_
    else:
        raise NotImplementedError

    print(f"time={time.time()-st_time:.3f}s")


    edge_list = []
    for index1, per in enumerate(indices):
        for index2 in per:
            index2 = int(index2)
            if index1 != index2:
                edge_list.append((query_ids[index1], center_ids[index2]))
    print(f"\tdone! .... time={time.time()-st_time:.3f}s")
    return edge_list