def NCM(o, Z_T): Y = np.c_[o, Z_T] # print(Y.T[:1]) tree = BallTree(Y.T, leaf_size=3) dist, ind = tree.query(Y.T[:1], k=self.k + 1) # print(ind) # indices of k closest neighbors # print(dist) # distances to k closest neighbors # print(dist.sum()) return dist.sum()
def test_query_haversine(): rng = check_random_state(0) X = 2 * np.pi * rng.random_sample((40, 2)) bt = BallTree(X, leaf_size=1, metric="haversine") dist1, ind1 = bt.query(X, k=5) dist2, ind2 = brute_force_neighbors(X, X, k=5, metric="haversine") assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
def knn_error_score(L, x_train, y_train, x_test, y_test, k, tree_size=15): """ Measures the training and testing errors of a kNN classifier implemented using BallTree. :param L: linear transformation :param x_train: training vectors (each column is an instance) :param y_train: training labels (row vector!!) :param x_test: test vectors :param y_test: test labels :param k: number of nearest neighbors :return: training and testing error in k-NN problem. """ assert y_train.ndim == 1, y_test.ndim == 1 assert x_train.shape[0] == len(y_train) assert x_test.shape[0] == len(y_test) assert isinstance(k, (int, np.int32, np.int64)) and k > 0 if len(L) != 0: # L is the initial linear projection, for example PCa or LDA x_train = x_train @ L.T x_test = x_test @ L.T tree = BallTree(x_train, leaf_size=tree_size, metric='euclidean') MM = np.append(y_train, y_test).min() NTr, NTe = x_train.shape[0], x_test.shape[0] # Use the tree to compute the distance between the testing and training points # iTe: indices of the testing elements in the training set dists, iTe = tree.query(x_test, k=k, return_distance=True) # Labels of the testing elements in the training set lTe2 = LSKnn2(y_train[iTe], k, MM) # Compute the error for each k test_error = np.sum(lTe2 != np.repeat(y_test, k, axis=0), axis=1) / NTe # Use the tree to compute the distance between the training points dists, iTr = tree.query(x_train, k=k + 1, return_distance=True) iTr = iTr[:, 1:] lTr2 = LSKnn2(y_train[iTr], k, MM) training_error = np.sum(lTr2 != np.repeat(y_train, k, axis=0), axis=1) / NTr return float(training_error), float(test_error)
def find_target_neighbors(X, labels, K, n_classes): N, D = X.shape targets_ind = np.zeros((N, K), dtype=int) for i in range(n_classes): jj, = np.where(labels == i) # Samples of the class i Xu = X[jj] kdt = BallTree(Xu, leaf_size=50, metric='euclidean') targets = kdt.query(Xu, k=K + 1, return_distance=False) targets_ind[jj] = jj[targets[:, 1:]] return targets_ind
def get_closest_locations(data, query_lon, query_lat, query_cat=None, query_subcat=None, num_locs=10): bt_lons = [] bt_lats = [] bt_indices = [] for n, entry in enumerate(data): valid = True if query_cat is not None and not (query_cat.lower().strip( ) in entry["mapping"]["top_category"].lower().strip()): valid = False if query_subcat is not None and not (query_subcat.lower().strip( ) in entry["mapping"]["sub_category"].lower().strip()): valid = False if not valid: break lon = float(entry["mapping"]["longitude"]) lat = float(entry["mapping"]["latitude"]) bt_lons.append(lon) bt_lats.append(lat) bt_indices.append(n) bt_lons = np.array(bt_lons) bt_lats = np.array(bt_lats) bt_indices = np.array(bt_indices) num_locs = min(num_locs, len(bt_indices)) if num_locs == 0: return [] records = pd.DataFrame(data={ 'lon': bt_lons, 'lat': bt_lats, 'index': bt_indices }) bt = BallTree(np.deg2rad(records[['lat', 'lon']].values), metric='haversine') distances, indices = bt.query(np.deg2rad(np.c_[query_lat, query_lon]), num_locs) data_indices = bt_indices[indices[0]].tolist() return data_indices
def test_ball_tree_query_metrics(metric): rng = check_random_state(0) if metric in BOOLEAN_METRICS: X = rng.random_sample((40, 10)).round(0) Y = rng.random_sample((10, 10)).round(0) elif metric in DISCRETE_METRICS: X = (4 * rng.random_sample((40, 10))).round(0) Y = (4 * rng.random_sample((10, 10))).round(0) k = 5 bt = BallTree(X, leaf_size=1, metric=metric) dist1, ind1 = bt.query(Y, k) dist2, ind2 = brute_force_neighbors(X, Y, k, metric) assert_array_almost_equal(dist1, dist2)
def find_impostors(pred, labels, n_classes, no_potential_impo): N = len(pred) active = np.zeros((N, no_potential_impo), dtype=int) for i in range(n_classes): ii, = np.where(labels == i) pi = pred[ii] jj, = np.where(labels != i) pj = pred[jj] # Find the nearest neighbors using a BallTree kdt = BallTree(pj, leaf_size=50, metric='euclidean') hardest_examples = kdt.query(pi, k=no_potential_impo, return_distance=False) active[ii] = jj[hardest_examples] return active