def generate_pairs(patches, constants): """Generate pairs for normalized patches.""" k_nearest = constants.K_NEAREST num_patches = constants.NUM_QUERY_PATCHES scaled_imgs = len(patches) pairs = [] query_database = [] candidate_database = [] index_database = [] length_database = [] for k in range(scaled_imgs): qp = [ patch.norm_patch for patch in patches[k] if 7 <= patch.bucket <= 9 ] qi = [ index for index, patch in enumerate(patches[k]) if 7 <= patch.bucket <= 9 ] # Choose lesser query patches through random selection to improve speed if len(qi) > num_patches: np.random.seed(0) selection = np.random.choice(np.arange(len(qi)), num_patches, replace=False).tolist() selection.sort() query_patches = [qp[i] for i in selection] query_indices = [qi[i] for i in selection] else: query_patches = qp query_indices = qi query_database.append(np.vstack([query_patches])) index_database.append(query_indices) length_database.append(len(query_indices)) candidate_database.append( np.vstack([[ patch.norm_patch for i, patch in enumerate(patches[k]) if 0 <= patch.bucket <= 5 ]])) p1 = np.concatenate(candidate_database) kdt = KDTree(p1, leaf_size=30, metric='euclidean') # Find list of nearest neighbours for each patch # `total` is used to correct indices of queried patches for every iteration total = 0 for k in range(scaled_imgs): nn = kdt.query(query_database[k], k=k_nearest, return_distance=False, sort_results=False) q = [total + index_database[k][i] for i in range(length_database[k])] for i in range(len(nn)): for j in range(k_nearest): pairs.append([q[i], nn[i][j]]) total += len(patches[k]) return pairs
def plot_nb_dists(X, nearest_neighbor, metric='euclidean', ylim=None): """ Plots distance sorted by `neared_neighbor`th Args: X (list of lists): list with data tuples nearest_neighbor (int): nr of nearest neighbor to plot metric (string): name of scipy metric function to use """ tree = KDTree(X, leaf_size=2) if not isinstance(nearest_neighbor, list): nearest_neighbor = [nearest_neighbor] max_nn = max(nearest_neighbor) dist, _ = tree.query(X, k=max_nn + 1) plt.figure() for nnb in nearest_neighbor: col = dist[:, nnb] col.sort() plt.plot(col, label="{}th nearest neighbor".format(nnb)) #plt.ylim(0, min(250, max(dist[:, max_nn]))) plt.ylabel("Distance to k nearest neighbor") plt.xlabel("Points sorted according to distance of k nearest neighbor") plt.ylim(0, ylim) plt.grid() plt.legend() plt.show()
def check_neighbors(dualtree, breadth_first, k, metric, kwargs): kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs) dist1, ind1 = kdt.query(Y, k, dualtree=dualtree, breadth_first=breadth_first) dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs) # don't check indices here: if there are any duplicate distances, # the indices may not match. Distances should not have this problem. assert_allclose(dist1, dist2)
def __call__(self, x, ma): h = F.tanh(self.l0(x)) #h = F.tanh(self.l1(h)) #h = F.tanh(self.l2(h)) #kd_tree q_train = [] #for train [variable,variable] ind_list = [] #for train dist_list = [] #for train for j in range(len(ma.maq)): #loop n_actions h_list = ma.mah[j] lp = len(h_list) leaf_size = lp + (lp / 2) tree = KDTree(h_list, leaf_size=leaf_size) h_ = h.data if lp < 50: k = lp else: k = 50 dist, ind = tree.query(h_, k=k) count = 0 for ii in ind[0]: mahi = np.zeros((1, 4), dtype=np.float32) mahi[0] = ma.mah[j][ii] hi = chainer.Variable(cuda.to_cpu(mahi)) wi = F.expand_dims( 1 / (F.batch_l2_norm_squared((h - hi)) + 0.001), 1) if count == 0: w = wi maqi = np.zeros((1, 1), dtype=np.float32) maqi[0] = ma.maq[j][ii] q = chainer.Variable(cuda.to_cpu(maqi)) qq = wi * q count += 1 else: w += wi maqi = np.zeros((1, 1), dtype=np.float32) maqi[0] = ma.maq[j][ii] q = chainer.Variable(cuda.to_cpu(maqi)) qq += wi * q qq /= w q_train.append(qq) ind_list.append(ind) dist_list.append(dist) self.q_list[0][j] = qq.data[0][0] qa = chainer.Variable(cuda.to_cpu(self.q_list)) return chainerrl.action_value.DiscreteActionValue( qa), q_train, ind_list, dist_list, h.data
def test_kd_tree_pickle(protocol): import pickle rng = check_random_state(0) X = rng.random_sample((10, 3)) kdt1 = KDTree(X, leaf_size=1) ind1, dist1 = kdt1.query(X) def check_pickle_protocol(protocol): s = pickle.dumps(kdt1, protocol=protocol) kdt2 = pickle.loads(s) ind2, dist2 = kdt2.query(X) assert_array_almost_equal(ind1, ind2) assert_array_almost_equal(dist1, dist2) check_pickle_protocol(protocol)
def test_kd_tree_pickle(): import pickle np.random.seed(0) X = np.random.random((10, 3)) kdt1 = KDTree(X, leaf_size=1) ind1, dist1 = kdt1.query(X) def check_pickle_protocol(protocol): s = pickle.dumps(kdt1, protocol=protocol) kdt2 = pickle.loads(s) ind2, dist2 = kdt2.query(X) assert_array_almost_equal(ind1, ind2) assert_array_almost_equal(dist1, dist2) for protocol in (0, 1, 2): yield check_pickle_protocol, protocol
def test_kd_tree_pickle(): import pickle np.random.seed(0) X = np.random.random((10, 3)) kdt1 = KDTree(X, leaf_size=1) ind1, dist1 = kdt1.query(X) def check_pickle_protocol(protocol): s = pickle.dumps(kdt1, protocol=protocol) kdt2 = pickle.loads(s) ind2, dist2 = kdt2.query(X) assert_allclose(ind1, ind2) assert_allclose(dist1, dist2) for protocol in (0, 1, 2): yield check_pickle_protocol, protocol
def __call__(self, x, ma): h = F.tanh(self.l0(x)) h = F.tanh(self.l1(h)) h = F.tanh(self.l2(h)) # kd_tree q_train = [] # for train [variable,variable] ind_list = [] # for train dist_list = [] # for train for j in range(len(ma.maq)): # loop n_actions h_list = ma.mah[j] lp = len(h_list) leaf_size = lp + (lp / 2) tree = KDTree(h_list, leaf_size=leaf_size) h_ = h.data if lp < 50: k = lp else: k = 50 dist, ind = tree.query(h_, k=k) mahi = ma.mah[j][ind[0]] hi = chainer.Variable(cuda.to_cpu(mahi)) tiled_h = chainer.Variable(np.tile(h.data, (len(ind[0]), 1))) wi = F.expand_dims( 1 / (F.sqrt(F.sum((tiled_h - hi) * (tiled_h - hi), axis=1) + 1e-3)), 1) w = F.sum(wi, axis=0) maqi = ma.maq[j][ind[0]] q = chainer.Variable(cuda.to_cpu(maqi)) qq = F.expand_dims(F.sum(wi * q, axis=0) / w, 1) q_train.append(qq) ind_list.append(ind) dist_list.append(dist) self.q_list[0][j] = qq.data if self.use_gpu: qa = chainer.Variable(cuda.to_cpu(self.q_list)) else: qa = self.q_list return qa, q_train, ind_list, dist_list, h.data
def generate_pairs_raw(patches, constants): """Generate raw pairs without patch normalization.""" # Convert the list of patch norms into numpy arrays patch_database = [] patch_database.append( np.vstack([np.reshape(patch.raw_patch, [-1]) for patch in patches[0]])) # Find list of just 2 nearest neighbours for each patch due to duplicate nearest = [] p1 = np.concatenate(patch_database[0:]) kdt = KDTree(p1, leaf_size=30, metric='euclidean') nn = kdt.query(patch_database[0], k=2, return_distance=False, sort_results=False) nearest.append(nn) return np.concatenate(nearest)
def write(self, h, v): keys = np.array(self.memory_keys, dtype=np.float32) values = np.array(self.memory_values, dtype=np.float32) if len(self.memory_keys) > 0: tree = KDTree(keys, leaf_size=50) distance, index = tree.query(np.array([h], dtype=np.float32)) if distance[0][0] == 0: index = index[0][0] self.memory_values[index] += self.lr * (v - self.memory_values[index]) return if len(self.memory_values) < self.capacity: self.ages[len(self.memory_values) - 1] = 0 self.memory_keys.append(h) self.memory_values.append(v) else: index = np.argmin(self.ages) self.memory_keys[index] = h self.memory_values[index] = v self.ages[index] = 0
def lookup(self, h): if len(self.memory_values) == 0: return np.zeros((len(h), 1, len(h[0])), dtype=np.float32), np.zeros((len(h), 1), dtype=np.float32) keys = np.array(self.memory_keys, dtype=np.float32) values = np.array(self.memory_values, dtype=np.float32) size = keys.shape[0] if size < self.p: k = size else: k = self.p queried_keys = np.zeros((len(h), k, len(h[0])), dtype=np.float32) queried_values = np.zeros((len(h), k), dtype=np.float32) for i, encoded_state in enumerate(h): tree = KDTree(keys, leaf_size=50) distances, indices = tree.query(np.array([encoded_state], dtype=np.float32), k=k) queried_keys[i] = keys[indices] queried_values[i] = values[indices][-1] self.ages += 1 self.ages[indices] = 0 return queried_keys, queried_values
from VLADlib.Descriptors import * pathVD = "visualWords/visualWords.pickle" with open(pathVD, 'rb') as f: vocab = pickle.load(f) training = np.asarray([i.toarray()[0].tolist() for i in vocab]) tree = KDTree(training, leaf_size=2) image = 'dataset/3.jpg' im = cv2.imread(image) # initial BoW pathVD = 'visualDictionary/visualDictionary2ORB.pickle' with open(pathVD, 'rb') as g: visualDictionary = pickle.load(g) bovw = BagOfVisualWords(visualDictionary.cluster_centers_) #compute descriptors kp, descriptor = describeORB(im) # represent at BoW hist = bovw.describe(descriptor) query = np.asarray(hist.toarray()[0].tolist()) print("Query = ", query) dist, ind = tree.query(query.reshape(1, -1), k=3) print(ind)
#from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import NearestNeighbors from sklearn.neighbors.kd_tree import KDTree #from sklearn.neighbors import DistanceMetric import numpy as np import get_data2 as gd headers = gd.get_headers() dicts = gd.get_data_list_of_dicts() rows_lol = [] for i in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol.append([]) for i in range(len(headers)): if i ==1 or i==4: column = gd.get_data_slice_numbers(headers[i], dicts) else: column = gd.get_data_slice_numbers(headers[i], dicts) for j in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol[j].append(column[j]) X = np.array(rows_lol) #nbrs = NearestNeighbors(n_neighbors=5, algorithm ='kd_tree', metric ='jaccard').fit(X) kdt = KDTree(X, leaf_size=30, metric='euclidean') kdt.query(X, k=3, return_distance=False)