Exemple #1
0
 def NCM(o, Z_T):
     Y = np.c_[o, Z_T]
     # print(Y.T[:1])
     tree = BallTree(Y.T, leaf_size=3)
     dist, ind = tree.query(Y.T[:1], k=self.k + 1)
     # print(ind)  # indices of k closest neighbors
     # print(dist)  # distances to k closest neighbors
     # print(dist.sum())
     return dist.sum()
Exemple #2
0
def test_query_haversine():
    rng = check_random_state(0)
    X = 2 * np.pi * rng.random_sample((40, 2))
    bt = BallTree(X, leaf_size=1, metric="haversine")
    dist1, ind1 = bt.query(X, k=5)
    dist2, ind2 = brute_force_neighbors(X, X, k=5, metric="haversine")

    assert_array_almost_equal(dist1, dist2)
    assert_array_almost_equal(ind1, ind2)
Exemple #3
0
def knn_error_score(L, x_train, y_train, x_test, y_test, k, tree_size=15):
    """
    Measures the training and testing errors of a kNN classifier implemented using BallTree.
    :param L: linear transformation
    :param x_train: training vectors (each column is an instance)
    :param y_train: training labels  (row vector!!)
    :param x_test: test vectors
    :param y_test: test labels
    :param k: number of nearest neighbors
    :return: training and testing error in k-NN problem.
    """
    assert y_train.ndim == 1, y_test.ndim == 1
    assert x_train.shape[0] == len(y_train)
    assert x_test.shape[0] == len(y_test)
    assert isinstance(k, (int, np.int32, np.int64)) and k > 0

    if len(L) != 0:
        # L is the initial linear projection, for example PCa or LDA
        x_train = x_train @ L.T
        x_test = x_test @ L.T

    tree = BallTree(x_train, leaf_size=tree_size, metric='euclidean')

    MM = np.append(y_train, y_test).min()
    NTr, NTe = x_train.shape[0], x_test.shape[0]

    # Use the tree to compute the distance between the testing and training points
    # iTe: indices of the testing elements in the training set
    dists, iTe = tree.query(x_test, k=k, return_distance=True)

    # Labels of the testing elements in the training set
    lTe2 = LSKnn2(y_train[iTe], k, MM)
    # Compute the error for each k
    test_error = np.sum(lTe2 != np.repeat(y_test, k, axis=0), axis=1) / NTe

    # Use the tree to compute the distance between the training points
    dists, iTr = tree.query(x_train, k=k + 1, return_distance=True)
    iTr = iTr[:, 1:]
    lTr2 = LSKnn2(y_train[iTr], k, MM)
    training_error = np.sum(lTr2 != np.repeat(y_train, k, axis=0), axis=1) / NTr

    return float(training_error), float(test_error)
Exemple #4
0
def find_target_neighbors(X, labels, K, n_classes):
    N, D = X.shape
    targets_ind = np.zeros((N, K), dtype=int)
    for i in range(n_classes):
        jj, = np.where(labels == i)
        # Samples of the class i
        Xu = X[jj]
        kdt = BallTree(Xu, leaf_size=50, metric='euclidean')
        targets = kdt.query(Xu, k=K + 1, return_distance=False)
        targets_ind[jj] = jj[targets[:, 1:]]

    return targets_ind
Exemple #5
0
def get_closest_locations(data,
                          query_lon,
                          query_lat,
                          query_cat=None,
                          query_subcat=None,
                          num_locs=10):
    bt_lons = []
    bt_lats = []
    bt_indices = []

    for n, entry in enumerate(data):
        valid = True
        if query_cat is not None and not (query_cat.lower().strip(
        ) in entry["mapping"]["top_category"].lower().strip()):
            valid = False
        if query_subcat is not None and not (query_subcat.lower().strip(
        ) in entry["mapping"]["sub_category"].lower().strip()):
            valid = False

        if not valid:
            break

        lon = float(entry["mapping"]["longitude"])
        lat = float(entry["mapping"]["latitude"])
        bt_lons.append(lon)
        bt_lats.append(lat)
        bt_indices.append(n)

    bt_lons = np.array(bt_lons)
    bt_lats = np.array(bt_lats)
    bt_indices = np.array(bt_indices)

    num_locs = min(num_locs, len(bt_indices))
    if num_locs == 0:
        return []

    records = pd.DataFrame(data={
        'lon': bt_lons,
        'lat': bt_lats,
        'index': bt_indices
    })

    bt = BallTree(np.deg2rad(records[['lat', 'lon']].values),
                  metric='haversine')
    distances, indices = bt.query(np.deg2rad(np.c_[query_lat, query_lon]),
                                  num_locs)

    data_indices = bt_indices[indices[0]].tolist()

    return data_indices
def test_ball_tree_query_metrics(metric):
    rng = check_random_state(0)
    if metric in BOOLEAN_METRICS:
        X = rng.random_sample((40, 10)).round(0)
        Y = rng.random_sample((10, 10)).round(0)
    elif metric in DISCRETE_METRICS:
        X = (4 * rng.random_sample((40, 10))).round(0)
        Y = (4 * rng.random_sample((10, 10))).round(0)

    k = 5

    bt = BallTree(X, leaf_size=1, metric=metric)
    dist1, ind1 = bt.query(Y, k)
    dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
    assert_array_almost_equal(dist1, dist2)
Exemple #7
0
def find_impostors(pred, labels, n_classes, no_potential_impo):
    N = len(pred)
    active = np.zeros((N, no_potential_impo), dtype=int)
    for i in range(n_classes):
        ii, = np.where(labels == i)
        pi = pred[ii]
        jj, = np.where(labels != i)
        pj = pred[jj]
        # Find the nearest neighbors using a BallTree
        kdt = BallTree(pj, leaf_size=50, metric='euclidean')
        hardest_examples = kdt.query(pi,
                                     k=no_potential_impo,
                                     return_distance=False)
        active[ii] = jj[hardest_examples]

    return active