Ejemplo n.º 1
0
    def _check_p_distance_vs_KDT(self, p):
        bt = BallTree(self.X, leaf_size=10, metric='minkowski', p=p)
        kdt = cKDTree(self.X, leafsize=10)

        dist_bt, ind_bt = bt.query(self.X, k=5)
        dist_kd, ind_kd = kdt.query(self.X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
Ejemplo n.º 2
0
    def _check_p_distance_vs_KDT(self, p):
        bt = BallTree(self.X, leaf_size=10, metric='minkowski', p=p)
        kdt = cKDTree(self.X, leafsize=10)

        dist_bt, ind_bt = bt.query(self.X, k=5)
        dist_kd, ind_kd = kdt.query(self.X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
Ejemplo n.º 3
0
def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30):
    print("Ball Tree")
    X = np.random.random((N, D)).astype(DTYPE)

    t0 = time()
    btskl = skBallTree(X, leaf_size=leaf_size)
    t1 = time()
    bt = BallTree(X, leaf_size=leaf_size)
    t2 = time()

    print("Build:")
    print("  sklearn : %.2g sec" % (t1 - t0))
    print("  new     : %.2g sec" % (t2 - t1))

    t0 = time()
    Dskl, Iskl = btskl.query(X, k)
    t1 = time()

    dist = [Dskl]
    ind = [Iskl]
    times = [t1 - t0]
    labels = ['sklearn']
    counts = [-1]

    for dualtree in (False, True):
        for breadth_first in (False, True):
            bt.reset_n_calls()
            t0 = time()
            D, I = bt.query(X, k, dualtree=dualtree,
                            breadth_first=breadth_first)
            t1 = time()
            dist.append(D)
            ind.append(I)
            times.append(t1 - t0)
            counts.append(bt.get_n_calls())

            if dualtree:
                label = 'dual/'
            else:
                label = 'single/'

            if breadth_first:
                label += 'breadthfirst'
            else:
                label += 'depthfirst'
            labels.append(label)

    print("Query:")
    for lab, t, c in zip(labels, times, counts):
        print("  %s : %.2g sec (%i calls)" % (lab, t, c))
    print
    print(" distances match: %s"
          % ', '.join(['%s' % np.allclose(dist[i - 1], dist[i])
                       for i in range(len(dist))]))
    print(" indices match: %s"
          % ', '.join(['%s' % np.allclose(ind[i - 1], ind[i])
                       for i in range(len(ind))]))
Ejemplo n.º 4
0
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = bt.query(Y, k, dualtree=dualtree,
                               breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)
Ejemplo n.º 5
0
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = bt.query(Y,
                               k,
                               dualtree=dualtree,
                               breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)
Ejemplo n.º 6
0
def test_ball_tree_query():
    X = np.random.random(size=(100, 5))

    for k in (2, 4, 6):
        bt = BallTree(X)
        kdt = cKDTree(X)

        dist_bt, ind_bt = bt.query(X, k=k)
        dist_kd, ind_kd = kdt.query(X, k=k)

        assert_array_almost_equal(dist_bt, dist_kd)
Ejemplo n.º 7
0
def test_ball_tree_p_distance():
    X = np.random.random(size=(100, 5))

    for p in (1, 2, 3, 4, np.inf):
        bt = BallTree(X, leaf_size=10, metric="minkowski", p=p)
        kdt = cKDTree(X, leafsize=10)

        dist_bt, ind_bt = bt.query(X, k=5)
        dist_kd, ind_kd = kdt.query(X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
Ejemplo n.º 8
0
def test_ball_tree_query():
    X = np.random.random(size=(100, 5))

    for k in (2, 4, 6):
        bt = BallTree(X)
        kdt = cKDTree(X)

        dist_bt, ind_bt = bt.query(X, k=k)
        dist_kd, ind_kd = kdt.query(X, k=k)

        assert_array_almost_equal(dist_bt, dist_kd)
Ejemplo n.º 9
0
def test_ball_tree_p_distance():
    X = np.random.random(size=(100, 5))

    for p in (1, 2, 3, 4, np.inf):
        bt = BallTree(X, leaf_size=10, metric='minkowski', p=p)
        kdt = cKDTree(X, leafsize=10)

        dist_bt, ind_bt = bt.query(X, k=5)
        dist_kd, ind_kd = kdt.query(X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
Ejemplo n.º 10
0
def test_ball_tree_query_radius_count(n_samples=100, n_features=10):
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1

    dm = DistanceMetric()
    D = dm.pdist(X, squareform=True)

    r = np.mean(D)

    bt = BallTree(X)
    count1 = bt.query_radius(X, r, count_only=True)

    count2 = (D <= r).sum(1)

    assert_array_almost_equal(count1, count2)
Ejemplo n.º 11
0
    def _check_metrics_float(self, k, metric, kwargs):
        bt = BallTree(self.X, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.X, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.pdist(self.X, squareform=True)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.X.shape[0])[:, None], ind_dm]

        # we don't check the indices here because if there is a tie for
        # nearest neighbor, then the test may fail.  Distances will reflect
        # whether the search was successful
        assert_array_almost_equal(dist_bt, dist_dm)
Ejemplo n.º 12
0
def test_ball_tree_query_radius_count(n_samples=100, n_features=10):
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1

    dm = DistanceMetric()
    D = dm.pdist(X, squareform=True)

    r = np.mean(D)

    bt = BallTree(X)
    count1 = bt.query_radius(X, r, count_only=True)

    count2 = (D <= r).sum(1)

    assert_array_almost_equal(count1, count2)
Ejemplo n.º 13
0
    def _check_metrics_bool(self, k, metric, kwargs):
        bt = BallTree(self.Xbool, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.Ybool, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.cdist(self.Ybool, self.Xbool)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.Ybool.shape[0])[:, None], ind_dm]
        
        # we don't check the indices here because there are very often
        # ties for nearest neighbors, which cause the test to fail.
        # Distances will be correct in either case.
        assert_array_almost_equal(dist_bt, dist_dm)
Ejemplo n.º 14
0
    def _check_metrics_bool(self, k, metric, kwargs):
        bt = BallTree(self.Xbool, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.Ybool, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.cdist(self.Ybool, self.Xbool)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.Ybool.shape[0])[:, None], ind_dm]

        # we don't check the indices here because there are very often
        # ties for nearest neighbors, which cause the test to fail.
        # Distances will be correct in either case.
        assert_array_almost_equal(dist_bt, dist_dm)
Ejemplo n.º 15
0
    def _check_metrics_float(self, k, metric, kwargs):
        bt = BallTree(self.X, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.X, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.pdist(self.X, squareform=True)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.X.shape[0])[:, None], ind_dm]

        # we don't check the indices here because if there is a tie for
        # nearest neighbor, then the test may fail.  Distances will reflect
        # whether the search was successful
        assert_array_almost_equal(dist_bt, dist_dm)
Ejemplo n.º 16
0
    def test_query_radius_count(self):
        # center the data
        X = 2 * self.X - 1

        dm = DistanceMetric()
        D = dm.pdist(X, squareform=True)

        r = np.mean(D)

        bt = BallTree(X)
        count1 = bt.query_radius(X, r, count_only=True)

        count2 = (D <= r).sum(1)

        assert_array_almost_equal(count1, count2)
Ejemplo n.º 17
0
    def test_query_radius_count(self):
        # center the data
        X = 2 * self.X - 1

        dm = DistanceMetric()
        D = dm.pdist(X, squareform=True)

        r = np.mean(D)

        bt = BallTree(X)
        count1 = bt.query_radius(X, r, count_only=True)

        count2 = (D <= r).sum(1)

        assert_array_almost_equal(count1, count2)
Ejemplo n.º 18
0
def test_ball_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    bt1 = BallTree(X, leaf_size=1)
    ind1, dist1 = bt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)
        ind2, dist2 = bt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
Ejemplo n.º 19
0
    def test_query_radius_indices(self, n_queries=20):
        # center the data
        X = 2 * self.X - 1

        dm = DistanceMetric()
        D = dm.cdist(X[:n_queries], X)
        r = np.mean(D)

        bt = BallTree(X)
        ind = bt.query_radius(X[:n_queries], r, return_distance=False)
        ind2 = np.zeros(D.shape) + np.arange(D.shape[1])

        ind = np.concatenate(map(np.sort, ind))
        ind2 = ind2[D <= r]

        assert_array_almost_equal(ind, ind2)
Ejemplo n.º 20
0
def test_ball_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    bt1 = BallTree(X, leaf_size=1)
    ind1, dist1 = bt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)
        ind2, dist2 = bt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
Ejemplo n.º 21
0
def test_ball_tree_KDE(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    bt = BallTree(X, leaf_size=10)

    for kernel in [
            'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear',
            'cosine'
    ]:
        for h in [0.001, 0.01, 0.1]:
            dens_true = compute_kernel_slow(Y, X, kernel, h)

            def check_results(kernel, h, atol, rtol, dualtree, breadth_first):
                dens = bt.kernel_density(Y,
                                         h,
                                         atol=atol,
                                         rtol=rtol,
                                         kernel=kernel,
                                         dualtree=dualtree,
                                         breadth_first=breadth_first)
                assert_allclose(dens, dens_true, atol=atol, rtol=rtol)

            for rtol in [0, 1E-5]:
                for atol in [1E-10, 1E-5, 0.1]:
                    for dualtree in (True, False):
                        if dualtree and rtol > 0:
                            continue
                        for breadth_first in (True, False):
                            yield (check_results, kernel, h, atol, rtol,
                                   dualtree, breadth_first)
Ejemplo n.º 22
0
    def test_query_radius_indices(self, n_queries=20):
        # center the data
        X = 2 * self.X - 1

        dm = DistanceMetric()
        D = dm.cdist(X[:n_queries], X)
        r = np.mean(D)

        bt = BallTree(X)
        ind = bt.query_radius(X[:n_queries], r, return_distance=False)
        ind2 = np.zeros(D.shape) + np.arange(D.shape[1])

        ind = np.concatenate(map(np.sort, ind))
        ind2 = ind2[D <= r]
        
        assert_array_almost_equal(ind, ind2)
Ejemplo n.º 23
0
def test_ball_tree_query_radius_distance(n_samples=100, n_features=10):
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    bt = BallTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt)**2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind, dist = bt.query_radius(query_pt, r + eps, return_distance=True)

        ind = ind[0]
        dist = dist[0]

        d = np.sqrt(((query_pt - X[ind])**2).sum(1))

        assert_array_almost_equal(d, dist)
Ejemplo n.º 24
0
def test_ball_tree_query_radius_distance(n_samples=100, n_features=10):
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1e-15  # roundoff error can cause test to fail
    bt = BallTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt) ** 2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind, dist = bt.query_radius(query_pt, r + eps, return_distance=True)

        ind = ind[0]
        dist = dist[0]

        d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1))

        assert_array_almost_equal(d, dist)
Ejemplo n.º 25
0
def test_ball_tree_query_radius(n_samples=100, n_features=10):
    np.random.seed(0)
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    bt = BallTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt) ** 2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind = bt.query_radius(query_pt, r + eps)[0]
        i = np.where(rad <= r + eps)[0]

        ind.sort()
        i.sort()

        assert_allclose(i, ind)
Ejemplo n.º 26
0
def test_ball_tree_query_radius(n_samples=100, n_features=10):
    np.random.seed(0)
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    bt = BallTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt)**2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind = bt.query_radius(query_pt, r + eps)[0]
        i = np.where(rad <= r + eps)[0]

        ind.sort()
        i.sort()

        assert_allclose(i, ind)
Ejemplo n.º 27
0
def test_ball_tree_query_radius_indices(n_samples=100, n_features=10):
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1

    dm = DistanceMetric()
    D = dm.cdist(X[:10], X)

    r = np.mean(D)

    bt = BallTree(X)
    ind = bt.query_radius(X[:10], r, return_distance=False)

    for i in range(10):
        ind1 = ind[i]
        ind2 = np.where(D[i] <= r)[0]

        ind1.sort()
        ind2.sort()

        assert_array_almost_equal(ind1, ind2)
Ejemplo n.º 28
0
def test_ball_tree_query_radius_indices(n_samples=100, n_features=10):
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1

    dm = DistanceMetric()
    D = dm.cdist(X[:10], X)

    r = np.mean(D)

    bt = BallTree(X)
    ind = bt.query_radius(X[:10], r, return_distance=False)

    for i in range(10):
        ind1 = ind[i]
        ind2 = np.where(D[i] <= r)[0]

        ind1.sort()
        ind2.sort()

        assert_array_almost_equal(ind1, ind2)
Ejemplo n.º 29
0
def main():

    check('nbrs.radius_neighbors(p, radius)', setup_str_bt)
    check('nbrs.radius_neighbors(p, radius)', setup_str_bf)

    n, d = 1000, 3
    X = np.random.rand(n, d)
    p = X[0]
    radius = 0.4
    ball_tree_inds = BallTree(X).radius_neighbors(p, radius)
    brute_force_inds = BruteForce(X).radius_neighbors(p, radius)
    print(ball_tree_inds == brute_force_inds)
Ejemplo n.º 30
0
def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30):
    print("Ball Tree")
    X = np.random.random((N, D)).astype(DTYPE)

    t0 = time()
    btskl = skBallTree(X, leaf_size=leaf_size)
    t1 = time()
    bt = BallTree(X, leaf_size=leaf_size)
    t2 = time()

    print("Build:")
    print("  sklearn : %.2g sec" % (t1 - t0))
    print("  new     : %.2g sec" % (t2 - t1))

    t0 = time()
    Dskl, Iskl = btskl.query(X, k)
    t1 = time()

    dist = [Dskl]
    ind = [Iskl]
    times = [t1 - t0]
    labels = ['sklearn']
    counts = [-1]

    for dualtree in (False, True):
        for breadth_first in (False, True):
            bt.reset_n_calls()
            t0 = time()
            D, I = bt.query(X,
                            k,
                            dualtree=dualtree,
                            breadth_first=breadth_first)
            t1 = time()
            dist.append(D)
            ind.append(I)
            times.append(t1 - t0)
            counts.append(bt.get_n_calls())

            if dualtree:
                label = 'dual/'
            else:
                label = 'single/'

            if breadth_first:
                label += 'breadthfirst'
            else:
                label += 'depthfirst'
            labels.append(label)

    print("Query:")
    for lab, t, c in zip(labels, times, counts):
        print("  %s : %.2g sec (%i calls)" % (lab, t, c))
    print
    print(
        " distances match: %s" % ', '.join([
            '%s' % np.allclose(dist[i - 1], dist[i]) for i in range(len(dist))
        ]))
    print(
        " indices match: %s" % ', '.join(
            ['%s' % np.allclose(ind[i - 1], ind[i]) for i in range(len(ind))]))
Ejemplo n.º 31
0
def test_ball_tree_two_point(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    bt = BallTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    def check_two_point(r, dualtree):
        counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree)
        assert_allclose(counts, counts_true)

    for dualtree in (True, False):
        yield check_two_point, r, dualtree
Ejemplo n.º 32
0
    def test_query_radius_distance(self):
        # center the data
        X = 2 * self.X - 1

        # choose a query point near the origin
        query_pt = 0.01 * X[:1]

        eps = 1E-15  # roundoff error can cause test to fail
        bt = BallTree(X, leaf_size=5)

        # compute reference distances
        dm = DistanceMetric()
        dist_true = dm.cdist(query_pt, X)[0]
        dist_true.sort()

        for r in np.linspace(dist_true[0], dist_true[-1], 10):
            yield (self._check_query_radius_distance, X, bt, query_pt,
                   dist_true, r, eps)
Ejemplo n.º 33
0
    def __init__(self, training_data_size_ratio: float, k: int = 5):
        # Split dataset into training and testing data
        self.k = k
        self.dataset = prepare_data().values
        np.random.shuffle(self.dataset)
        size = int(len(self.dataset) * training_data_size_ratio)
        # Get the labels(unique values situated on the last column in the dataset)
        self.classes = set(self.dataset[:, -1])

        result_col = self.dataset.shape[1] - 1
        self.training_data = Classifier.create_classes(self.dataset[:size],
                                                       result_col,
                                                       self.classes)
        self.test_data = Classifier.create_classes(self.dataset[size:],
                                                   result_col, self.classes)

        # build the trees for each class in the training set
        self.training_trees = dict(
            (class_, BallTree(data, euclid_metric))
            for class_, data in self.training_data.items())
 def active_select(self):
     # generate ball tree for query variables
     idx = np.array(range(self.Q.shape[0]))
     Qtree = BallTree(self.Q, self.leaf_size, idx)
     # for each data point x ,
     # find minimum distance of set of query as min_dist(x)
     # count the number of query which has l2 distance from x  <= min_dist(x)+self.delta
     max_count_global = 0  # contains globally maximum number of query within specified range over all data points
     max_query_x_id = 0  # id of datapoint which posses maximum number of query within bound as specified
     # iterate over all data points
     for x, id in zip(self.Xtrain, range(self.Xtrain.shape[0])):
         # each data point maintains following list of distances of querypoints where the distance values are within
         # a threshold of minimum distance
         self.dist_list = []
         self.min_dist = float('inf')
         self.upper_b = self.min_dist + self.delta
         # updates above two variables
         self.max_query(x, Qtree, depth=0)
         # Count number of query within bound for x
         count = len(self.dist_list)
         if count > max_count_global:
             max_count_global = count
             max_query_x_id = id
     return max_query_x_id
Ejemplo n.º 35
0
 def test_query_knn(self):
     bt = BallTree(self.X)
     kdt = cKDTree(self.X)
     for k in (1, 2, 4, 8, 16):
         for dualtree in [True, False]:
             yield (self._check_query_knn, bt, kdt, k, dualtree)
Ejemplo n.º 36
0
from time import time
import numpy as np
from ball_tree import BallTree

X = np.random.random((10000, 3))

t0 = time()
BT = BallTree(X, 30)
t1 = time()
print "construction: %.2g sec" % (t1 - t0)

for k in [1, 2, 4, 8]:
    for dual in (False, True):
        t0 = time()
        BT.query(X, k, dualtree=dual)
        t1 = time()
        if dual:
            dual_str = ' (dual)'
        else:
            dual_str = ''
        print "query %i in [%i, %i]%s: %.3g sec" % (k, X.shape[0],
                                                    X.shape[1],
                                                    dual_str,
                                                    t1 - t0)
    
for r in 0.1, 0.3, 0.5:
    t0 = time()
    BT.query_radius(X[:1000], r)
    t1 = time()
    print "query r<%.1f in [%i, %i]: %.3g sec" % (r, X.shape[0], X.shape[1],
                                                  t1 - t0)
Ejemplo n.º 37
0
from time import time
import numpy as np
from ball_tree import BallTree

X = np.random.random((10000, 3))

t0 = time()
BT = BallTree(X, 30)
t1 = time()
print "construction: %.2g sec" % (t1 - t0)

for k in [1, 2, 4, 8]:
    for dual in (False, True):
        t0 = time()
        BT.query(X, k, dualtree=dual)
        t1 = time()
        if dual:
            dual_str = ' (dual)'
        else:
            dual_str = ''
        print "query %i in [%i, %i]%s: %.3g sec" % (k, X.shape[0], X.shape[1],
                                                    dual_str, t1 - t0)

for r in 0.1, 0.3, 0.5:
    t0 = time()
    BT.query_radius(X[:1000], r)
    t1 = time()
    print "query r<%.1f in [%i, %i]: %.3g sec" % (r, X.shape[0], X.shape[1],
                                                  t1 - t0)
Ejemplo n.º 38
0
 def test_pickle(self):
     bt1 = BallTree(self.X, leaf_size=1)
     ind1, dist1 = bt1.query(self.X)
     for protocol in (0, 1, 2):
         yield (self._check_pickle, protocol, bt1, ind1, dist1)
Ejemplo n.º 39
0
def bench_KDE(N=1000, D=3, h=0.5, leaf_size=30):
    X = np.random.random((N, D))
    bt = BallTree(X, leaf_size=leaf_size)
    kernel = 'gaussian'

    print "Kernel Density:"
    atol = 1E-5
    rtol = 1E-5

    for h in [0.001, 0.01, 0.1]:
        t0 = time()
        dens_true = np.exp(-0.5 * ((X[:, None, :]
                                    - X) ** 2).sum(-1) / h ** 2).sum(-1)
        dens_true /= h * np.sqrt(2 * np.pi)
        t1 = time()

        bt.reset_n_calls()
        t2 = time()
        dens1 = bt.kernel_density(X, h, atol=atol, rtol=rtol, kernel=kernel,
                                  dualtree=False, breadth_first=True)
        t3 = time()
        n1 = bt.get_n_calls()

        bt.reset_n_calls()
        t4 = time()
        dens2 = bt.kernel_density(X, h, atol=atol, rtol=rtol, kernel=kernel,
                                  dualtree=False, breadth_first=False)
        t5 = time()
        n2 = bt.get_n_calls()

        bt.reset_n_calls()
        t6 = time()
        dens3 = bt.kernel_density(X, h, atol=atol, kernel=kernel,
                                  dualtree=True, breadth_first=True)
        t7 = time()
        n3 = bt.get_n_calls()

        bt.reset_n_calls()
        t8 = time()
        dens4 = bt.kernel_density(X, h, atol=atol, kernel=kernel,
                                  dualtree=True, breadth_first=False)
        t9 = time()
        n4 = bt.get_n_calls()

        print " h = %.3f" % h
        print "   brute force: %.2g sec (%i calls)" % (t1 - t0, N * N)
        print("   single tree (depth first): %.2g sec (%i calls)"
              % (t3 - t2, n1))
        print("   single tree (breadth first): %.2g sec (%i calls)"
              % (t5 - t4, n2))
        print("   dual tree: (depth first) %.2g sec (%i calls)"
              % (t7 - t6, n3))
        print("   dual tree: (breadth first) %.2g sec (%i calls)"
              % (t9 - t8, n4))
        print "   distances match:", (np.allclose(dens_true, dens1,
                                                  atol=atol, rtol=rtol),
                                      np.allclose(dens_true, dens2,
                                                  atol=atol, rtol=rtol),
                                      np.allclose(dens_true, dens3,
                                                  atol=atol),
                                      np.allclose(dens_true, dens4,
                                                  atol=atol))
Ejemplo n.º 40
0
def bench_KDE(N=1000, D=3, h=0.5, leaf_size=30):
    X = np.random.random((N, D))
    bt = BallTree(X, leaf_size=leaf_size)
    kernel = 'gaussian'

    print "Kernel Density:"
    atol = 1E-5
    rtol = 1E-5

    for h in [0.001, 0.01, 0.1]:
        t0 = time()
        dens_true = np.exp(-0.5 * ((X[:, None, :] - X)**2).sum(-1) /
                           h**2).sum(-1)
        dens_true /= h * np.sqrt(2 * np.pi)
        t1 = time()

        bt.reset_n_calls()
        t2 = time()
        dens1 = bt.kernel_density(X,
                                  h,
                                  atol=atol,
                                  rtol=rtol,
                                  kernel=kernel,
                                  dualtree=False,
                                  breadth_first=True)
        t3 = time()
        n1 = bt.get_n_calls()

        bt.reset_n_calls()
        t4 = time()
        dens2 = bt.kernel_density(X,
                                  h,
                                  atol=atol,
                                  rtol=rtol,
                                  kernel=kernel,
                                  dualtree=False,
                                  breadth_first=False)
        t5 = time()
        n2 = bt.get_n_calls()

        bt.reset_n_calls()
        t6 = time()
        dens3 = bt.kernel_density(X,
                                  h,
                                  atol=atol,
                                  kernel=kernel,
                                  dualtree=True,
                                  breadth_first=True)
        t7 = time()
        n3 = bt.get_n_calls()

        bt.reset_n_calls()
        t8 = time()
        dens4 = bt.kernel_density(X,
                                  h,
                                  atol=atol,
                                  kernel=kernel,
                                  dualtree=True,
                                  breadth_first=False)
        t9 = time()
        n4 = bt.get_n_calls()

        print " h = %.3f" % h
        print "   brute force: %.2g sec (%i calls)" % (t1 - t0, N * N)
        print("   single tree (depth first): %.2g sec (%i calls)" %
              (t3 - t2, n1))
        print("   single tree (breadth first): %.2g sec (%i calls)" %
              (t5 - t4, n2))
        print("   dual tree: (depth first) %.2g sec (%i calls)" %
              (t7 - t6, n3))
        print("   dual tree: (breadth first) %.2g sec (%i calls)" %
              (t9 - t8, n4))
        print "   distances match:", (np.allclose(dens_true,
                                                  dens1,
                                                  atol=atol,
                                                  rtol=rtol),
                                      np.allclose(dens_true,
                                                  dens2,
                                                  atol=atol,
                                                  rtol=rtol),
                                      np.allclose(dens_true, dens3, atol=atol),
                                      np.allclose(dens_true, dens4, atol=atol))
Ejemplo n.º 41
0
class min_max_l2distance(learner):
    #curr_minmax=float('inf')
    #curr_winner=0

    def __init__(self, fp, leaf_size):
        #print 'inside'
        learner.__init__(self, fp)
        self.active_method = 'minmax_l2distance'
        self.leaf_size = leaf_size
        self.max_min_dist = 0
        self.max_min_point = 0
        self.curr_minmax = float('inf')
        self.curr_winner = 0
        self.curr_id = 0
        self.complexity = 0
        self.fcomplexity = self.fsave + 'complex'
        #self.bound=bound
    def create_ball_tree(self):  # done
        idx = np.array(range(self.Xtrain.shape[0]))
        self.tree = BallTree(self.Xtrain, self.leaf_size, idx)

    def show_ball_tree_n_points(self):
        #------------------------------------------------------------
        # Plot four different levels of the Ball tree
        X = self.Xtrain.toarray()
        fig = plt.figure(figsize=(5, 5))
        fig.subplots_adjust(wspace=0.1,
                            hspace=0.15,
                            left=0.1,
                            right=0.9,
                            bottom=0.05,
                            top=0.9)

        for level in range(4):
            ax = fig.add_subplot(2, 2, level, xticks=[], yticks=[])

            #ax.scatter(X[:, 0], X[:, 1], s=9)
            self.tree.draw_circle(ax, depth=level)

            #ax.scatter(Q[:, 0], Q[:, 1], s=9, color='r')
            #BT.draw_circle(ax, depth=None)
            #ax.set_xlim(-1.35, 1.35)
            #ax.set_ylim(-1.0, 1.7)
            ax.set_title('level %i' % level)

        # suptitle() adds a title to the entire figure
        fig.suptitle('Ball-tree Example')
        plt.show()

    def load_data(self):
        learner.load_data(self)
        #print 'not calling learner load data'
        #print 'creating ball tree'
        self.create_ball_tree()

    def create_query_ball(self):  # done

        q_center = np.array(self.Q.mean(0))
        #print q_center
        """
        plt.scatter(self.Q.toarray()[:,0], self.Q.toarray()[:,1], s=2)
        plt.scatter(q_center[0,0],q_center[0,1],c='r')
        plt.show()
        """
        q_radius = 0
        for i in range(self.Q.shape[0]):

            #print type(a)

            norm_val = LA.norm(self.Q.getrow(i).toarray() - q_center, 2)
            #print norm_val
            if norm_val > q_radius:
                q_radius = norm_val

        #print (self.Q-q_center)**2
        #q_radius = np.sqrt(np.max(np.sum((self.Q - q_center) ** 2, 1)))
        return q_center, q_radius

    def get_bounds(self, q_center, q_radius, BT):

        #print BT.loc.shape
        #print q_center.shape
        min_dist = LA.norm(BT.loc - q_center) - (BT.radius + q_radius)
        max_dist = LA.norm(BT.loc - q_center) + (BT.radius + q_radius)

        maxmin_dist = min_dist  #max( 0, LA.norm(BT.loc - q_center) - min(BT.radius, q_radius))
        minmax_dist = max(0, LA.norm(BT.loc - q_center) - BT.radius)

        #print "min %f\nmax %f\nmaxmin %f\nminmax %f\n" % (min_dist, max_dist, maxmin_dist, minmax_dist)
        return min_dist, max_dist, maxmin_dist, minmax_dist

    def prune_child_level1(self, q_center, q_radius, BT):  # left
        # check how ball tree implementation refer their children as
        # Compute the bounds for BOTH the children
        c1_min_d, c1_max_d, c1_maxmin_d, c1_minmax_d = self.get_bounds(
            q_center, q_radius, BT.child1)
        c2_min_d, c2_max_d, c2_maxmin_d, c2_minmax_d = self.get_bounds(
            q_center, q_radius, BT.child2)

        # If the lower bound (c1_maxmin_d) of child1 is higher
        # than upper bound (c2_minmax_d) of child2, prune child1
        #if( c1_maxmin_d > c2_minmax_d ):
        #if( c1_maxmin_d > c2_max_d ):
        if (c1_maxmin_d > c2_minmax_d):
            return 1, 0

        # If the lower bound (c2_maxmin_d) of child2 is higher
        # than upper bound (c1_minmax_d) of child1, prune child2
        #if( c2_maxmin_d > c1_minmax_d ):
        #if( c2_maxmin_d > c1_max_d ):
        if (c2_maxmin_d > c1_minmax_d):
            return 0, 1

        # Nothing to prune!
        return 0, 0

    def brute_force_l2_norm(self, X, idx):
        # Compute the minmax l2-norm distance now.
        minmax_eu = float("inf")
        #print X.getrow(0)
        for x, id in zip(X, idx):
            #print x.todense()
            #print id

            max_eu = float("-inf")
            for q in self.Q:
                #print type(x-q)

                eu = LA.norm((x - q).toarray())
                if eu > max_eu:
                    max_eu = eu
                    max_x = x  # unnecessary

            if max_eu < minmax_eu:
                minmax_eu = max_eu
                minmax_x = max_x
                minmax_id = id
            #print max_eu
            #print "for x=(%f,%f) , max_eu is %f" % (x.toarray()[0,0], x.toarray()[0,1],max_eu)
        #print "for x =(%f,%f) minmax dist is obtained as %f with idx as %d " % (minmax_x.toarray()[0,0],minmax_x.toarray()[0,1],minmax_eu,minmax_id)
        """
        plt.scatter(X.toarray()[:,0],X.toarray()[:,1], s=9 , c='b')
        
        plt.scatter(self.Q.toarray()[:,0],self.Q.toarray()[:,1], s=9,c='r')
        plt.scatter(minmax_x.toarray()[0,0],minmax_x.toarray()[0,1],s=9,c='g')
        plt.show()
        """
        complexity = X.shape[0] * self.Q.shape[0]
        return minmax_eu, minmax_x, minmax_id, complexity
        #print 'minmax_eu : {0}, winning data point : {1}'.format(minmax_eu, minmax_x)

    def minmaxdist(self, BT, q_center, q_radius, depth):

        #global curr_minmax
        #global curr_winner

        # Leaf Node
        if BT.child1 is None:
            print 'I am in child node at depth %d' % (depth)

            # We shouldn't be dumping a lot of data here as
            # we hope to prune more branches and hit the leaf
            # nodes less number of times.
            #dump_ball_contents(depth,BT)

            min_d, max_d, maxmin_d, minmax_d = self.get_bounds(
                q_center, q_radius, BT)
            print 'Current actual minmax = {0}, Ball minmax = {1}'.format(
                self.curr_minmax, minmax_d)
            #print 'Current actual minmax = {0}, Ball minmax bound = {1}'.format(self.curr_minmax,minmax_d)
            if (minmax_d < self.curr_minmax):
                #print 'brute force check '
                # Now just do a brute force computation
                win_dist, win_x, win_id, curr_complexity = self.brute_force_l2_norm(
                    BT.data, BT.idx)
                self.complexity += curr_complexity
                if (win_dist < self.curr_minmax):
                    #print win_dist
                    #print 'previous minmax was %f where curr minmax is %f' % (self.curr_minmax,win_dist)
                    self.curr_minmax = win_dist
                    self.curr_winner = win_x
                    self.curr_id = win_id
                    #print '----- Current minmax_euclidean, minmax data point = <{0},{1}>'.format(win_dist,win_id)
                    #print '----- Number of points processed = {0}\n'.format(BT.data.shape[0])
            else:
                #print  'therefore not checking'
                return

        # Internal Node
        else:
            #print 'now in internal node at depth %d' % (depth)
            #dump_ball_contents(depth,BT)
            # Compute the bounds for BOTH the children
            c1_min_d, c1_max_d, c1_maxmin_d, c1_minmax_d = self.get_bounds(
                q_center, q_radius, BT.child1)
            c2_min_d, c2_max_d, c2_maxmin_d, c2_minmax_d = self.get_bounds(
                q_center, q_radius, BT.child2)

            # Work out what nodes to prune and what to leave!
            #print 'check which child to prune '
            c1_prune, c2_prune = self.prune_child_level1(
                q_center, q_radius, BT)
            print '---- pruning flags after level 1 = ({0},{1})'.format(
                c1_prune, c2_prune)

            if (c1_prune == 0 and c2_prune == 1):
                #print '--- pruned child2! ---'
                print 'Current minmax = {0} where child 1 minmax bound = {1}'.format(
                    self.curr_minmax, c1_minmax_d)
                if (c1_minmax_d < self.curr_minmax):
                    #print 'going in child 1 '
                    self.minmaxdist(BT.child1, q_center, q_radius, depth + 1)

            if (c2_prune == 0 and c1_prune == 1):
                #print '--- pruned child1! ---'
                print 'Current minmax = {0} where child 2 minmax bound = {1}'.format(
                    self.curr_minmax, c2_minmax_d)
                if (c2_minmax_d < self.curr_minmax):
                    #print 'going in child 2'
                    self.minmaxdist(BT.child2, q_center, q_radius, depth + 1)

            if (c1_prune == 0 and c2_prune == 0):
                #print '--- No child pruned, so we order them! ---'

                # First descend down child 1
                if (c1_minmax_d < c2_minmax_d):
                    #print 'c1 before c2'
                    print 'Current minmax = {0} where child 1 minmax bound = {1}'.format(
                        self.curr_minmax, c1_minmax_d)
                    if (c1_minmax_d < self.curr_minmax):
                        #print 'going in first child  %d' % (depth)
                        self.minmaxdist(BT.child1, q_center, q_radius,
                                        depth + 1)
                    print 'Current minmax = {0} where child 2 minmax bound = {1}'.format(
                        self.curr_minmax, c2_minmax_d)
                    if (c2_minmax_d < self.curr_minmax):
                        #print 'going in second child  %d' % (depth)
                        self.minmaxdist(BT.child2, q_center, q_radius,
                                        depth + 1)
                else:
                    #print 'c2 before c1'
                    print 'Current minmax = {0} where child 2 minmax bound = {1}'.format(
                        self.curr_minmax, c2_minmax_d)
                    if (c2_minmax_d < self.curr_minmax):
                        #print 'going in second child  %d' % (depth)
                        self.minmaxdist(BT.child2, q_center, q_radius,
                                        depth + 1)
                    print 'Current minmax = {0} where child 1 minmax bound = {1}'.format(
                        self.curr_minmax, c1_minmax_d)
                    if (c1_minmax_d < self.curr_minmax):
                        #print 'going in first child  %d' % (depth)
                        self.minmaxdist(BT.child1, q_center, q_radius,
                                        depth + 1)

    def write_complexity_ratio(self):
        max_complexity = self.Xtrain.shape[0] * self.Q.shape[0]
        with open(self.fcomplexity, 'a') as fp:
            fp.write('search complexity ' + str(self.complexity) +
                     ' out of total ' + str(max_complexity) + ' nodes\n')

    def active_select(self):
        self.complexity = 0
        q_center, q_radius = self.create_query_ball()
        #print 'query ball created'
        #self.prune_child_level1(q_center, q_radius, self.tree)
        #self.get_bounds(q_center, q_radius,self.tree)
        self.minmaxdist(self.tree, q_center, q_radius, 0)

        #self.write_complexity_ratio()
        #print 'min max dist'
        #print 'current minmax distance %f and winner sample is %s' % (self.curr_minmax,','.join(str(e) for e in list(self.curr_winner.toarray())))
        #idx=0 # to be found
        #self.brute_force_l2_norm(self.Xtrain)
        #self.minmaxdist(self.tree, q_center, q_radius, depth)
        #print self.curr_id
        #return self.curr_minmax,self.curr_winner,self.curr_id, self.complexity
        return self.curr_id

    def check_recursion(self):
        if self.ck_rec == 0:
            print 'ck rec %d ' % (self.ck_rec)
            return
        print 'ck rec %d' % (self.ck_rec)
        self.ck_rec -= 1
        self.check_recursion()
        return

    """Doubt
    should we return only nearest points? How to deal with repeatation?
    """
    # print the ball at level 4
    # whenever getting dist for a leaf  draw the circle with red
    """
Ejemplo n.º 42
0
 def fit(self, X, Y=()):
     # we need Y to be an integer, because after we'll use it an index
     self.Y = np.asanyarray(Y, dtype=np.int)
     self.ball_tree = BallTree(X, self.window_size)
     return self
Ejemplo n.º 43
0
class Neighbors(BaseEstimator, ClassifierMixin):
    """Classifier implementing k-Nearest Neighbor Algorithm.

    Parameters
    ----------
    data : array-like, shape (n, k)
        The data points to be indexed. This array is not copied, and so
        modifying this data will result in bogus results.
    labels : array
        An array representing labels for the data (only arrays of
        integers are supported).
    n_neighbors : int
        default number of neighbors.
    window_size : int
        Window size passed to BallTree

    Examples
    --------
    >>> samples = [[0.,0.,1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
    >>> labels = [0,0,1,1]
    >>> from scikits.learn.neighbors import Neighbors
    >>> neigh = Neighbors(n_neighbors=3)
    >>> neigh.fit(samples, labels)
    Neighbors(n_neighbors=3, window_size=1)
    >>> print neigh.predict([[0,0,0]])
    [ 0.]

    Notes
    -----
    http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
    """

    def __init__(self, n_neighbors=5, window_size=1):
        """Internally uses the ball tree datastructure and algorithm for fast
        neighbors lookups on high dimensional datasets.
        """
        self.n_neighbors = n_neighbors
        self.window_size = window_size

    def fit(self, X, Y=()):
        # we need Y to be an integer, because after we'll use it an index
        self.Y = np.asanyarray(Y, dtype=np.int)
        self.ball_tree = BallTree(X, self.window_size)
        return self

    def kneighbors(self, data, n_neighbors=None):
        """Finds the K-neighbors of a point.

        Parameters
        ----------
        point : array-like
            The new point.
        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        dist : array
            Array representing the lengths to point.
        ind : array
            Array representing the indices of the nearest points in the
            population matrix.

        Examples
        --------
        In the following example, we construnct a Neighbors class from an
        array representing our data set and ask who's the closest point to
        [1,1,1]

        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
        >>> labels = [0, 0, 1]
        >>> from scikits.learn.neighbors import Neighbors
        >>> neigh = Neighbors(n_neighbors=1)
        >>> neigh.fit(samples, labels)
        Neighbors(n_neighbors=1, window_size=1)
        >>> print neigh.kneighbors([1., 1., 1.])
        (array(0.5), array(2))

        As you can see, it returns [0.5], and [2], which means that the
        element is at distance 0.5 and is the third element of samples
        (indexes start at 0). You can also query for multiple points:

        >>> print neigh.kneighbors([[0., 1., 0.], [1., 0., 1.]])
        (array([ 0.5       ,  1.11803399]), array([1, 2]))

        """
        if n_neighbors is None:
            n_neighbors = self.n_neighbors
        return self.ball_tree.query(data, k=n_neighbors)

    def predict(self, T, n_neighbors=None):
        """Predict the class labels for the provided data.

        Parameters
        ----------
        test: array
            A 2-D array representing the test point.
        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        labels: array
            List of class labels (one for each data sample).

        Examples
        --------
        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
        >>> labels = [0, 0, 1]
        >>> from scikits.learn.neighbors import Neighbors
        >>> neigh = Neighbors(n_neighbors=1)
        >>> neigh.fit(samples, labels)
        Neighbors(n_neighbors=1, window_size=1)
        >>> print neigh.predict([.2, .1, .2])
        0
        >>> print neigh.predict([[0., -1., 0.], [3., 2., 0.]])
        [0 1]
        """
        T = np.asanyarray(T)
        if n_neighbors is None:
            n_neighbors = self.n_neighbors
        return _predict_from_BallTree(self.ball_tree, self.Y, T, n_neighbors)
Ejemplo n.º 44
0
 def check_neighbors(metric):
     bt = BallTree(X, leaf_size=1, metric=metric)
     dist1, ind1 = bt.query(Y, k)
     dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
     assert_allclose(dist1, dist2)
Ejemplo n.º 45
0
def kneighbors_graph(X, n_neighbors, weight=None, ball_tree=None,
                     window_size=1):
    """Computes the (weighted) graph of k-Neighbors

    Parameters
    ----------
    X : array-like, shape = [n_samples, n_features]
        Coordinates of samples. One sample per row.

    n_neighbors : int
        Number of neighbors for each sample.

    weight : None (default)
        Weights to apply on graph edges. If weight is None
        then no weighting is applied (1 for each edge).
        If weight equals "distance" the edge weight is the
        euclidian distance. If weight equals "barycenter"
        the weights are barycenter weights estimated by
        solving a linear system for each point.

    ball_tree : None or instance of precomputed BallTree

    window_size : int
        Window size pass to the BallTree

    Returns
    -------
    A : sparse matrix, shape = [n_samples, n_samples]
        A is returned as LInked List Sparse matrix
        A[i,j] = weight of edge that connects i to j

    Examples
    --------
    >>> X = [[0], [2], [1]]
    >>> from scikits.learn.neighbors import kneighbors_graph
    >>> A = kneighbors_graph(X, 2)
    >>> A.todense()
    matrix([[ 1.,  0.,  1.],
            [ 0.,  1.,  1.],
            [ 0.,  1.,  1.]])
    """
    from scipy import sparse
    X = np.asanyarray(X)
    n_samples = X.shape[0]
    if ball_tree is None:
        ball_tree = BallTree(X, window_size)
    A = sparse.lil_matrix((n_samples, ball_tree.size))
    dist, ind = ball_tree.query(X, k=n_neighbors)
    if weight is None:
        for i, li in enumerate(ind):
            if n_neighbors > 1:
                A[i, list(li)] = np.ones(n_neighbors)
            else:
                A[i, li] = 1.0
    elif weight is "distance":
        for i, li in enumerate(ind):
            if n_neighbors > 1:
                A[i, list(li)] = dist[i, :]
            else:
                A[i, li] = dist[i, 0]
    elif weight is "barycenter":
        # XXX : the next loop could be done in parallel
        # by parallelizing groups of indices
        for i, li in enumerate(ind):
            if n_neighbors > 1:
                X_i = ball_tree.data[li]
                A[i, list(li)] = barycenter_weights(X[i], X_i)
            else:
                A[i, li] = 1.0
    else:
        raise ValueError("Unknown weight type")
    return A
Ejemplo n.º 46
0
 def test_pickle(self):
     bt1 = BallTree(self.X, leaf_size=1)
     ind1, dist1 = bt1.query(self.X)
     for protocol in (0, 1, 2):
         yield (self._check_pickle, protocol, bt1, ind1, dist1)
Ejemplo n.º 47
0
    rseed = np.random.randint(100000)
    print "rseed = %i" % rseed
    np.random.seed(rseed)
    X = np.random.random((200, 3))
    Y = np.random.random((100, 3))

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1, n1 = SBT.query(Y, 3)
    t1 = time()

    print "python: %.2g sec" % (t1 - t0)

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1a, n1a = SBT.query_dual(Y, 3)
    t1 = time()

    print "python dual: %.2g sec" % (t1 - t0)

    t0 = time()
    BT = BallTree(X, leaf_size=10)
    d2, n2 = BT.query(Y, 3)
    t1 = time()

    print "cython: %.2g sec" % (t1 - t0)

    print "neighbors match:", np.allclose(n1, n2), np.allclose(n1a, n1)
    print "distances match:", np.allclose(d1, d2), np.allclose(d1a, d1)
Ejemplo n.º 48
0
 def create_ball_tree(self):  # done
     idx = np.array(range(self.Xtrain.shape[0]))
     self.tree = BallTree(self.Xtrain, self.leaf_size, idx)
Ejemplo n.º 49
0
 def check_neighbors(metric):
     bt = BallTree(X, leaf_size=1, metric=metric)
     dist1, ind1 = bt.query(Y, k)
     dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
     assert_allclose(dist1, dist2)
Ejemplo n.º 50
0
    rseed = np.random.randint(100000)
    print "rseed = %i" % rseed
    np.random.seed(rseed)
    X = np.random.random((200, 3))
    Y = np.random.random((100, 3))

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1, n1 = SBT.query(Y, 3)
    t1 = time()

    print "python: %.2g sec" % (t1 - t0)

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1a, n1a = SBT.query_dual(Y, 3)
    t1 = time()

    print "python dual: %.2g sec" % (t1 - t0)

    t0 = time()
    BT = BallTree(X, leaf_size=10)
    d2, n2 = BT.query(Y, 3)
    t1 = time()

    print "cython: %.2g sec" % (t1 - t0)

    print "neighbors match:", np.allclose(n1, n2), np.allclose(n1a, n1)
    print "distances match:", np.allclose(d1, d2), np.allclose(d1a, d1)
Ejemplo n.º 51
0
        if len(heap) > k:
            heap.pop()
    for candidate in heap:
        # print(candidate)
        x_, y_ = candidate[0]
        plt.plot(x_, y_, 'bo', color='pink')

    print(distances)
    all = True
    for candidate in heap:
        if not candidate[1] in s:
            all = False
            break
    print('All found in the brute force approach? %s' % all)

    tree = BallTree(points, euclid_metric)
    distance_balls = knn(tree, point, k, euclid_metric)
    # print(len(distance_balls))
    # print(distance_balls)

    all = True
    for candidate in distance_balls:
        x, y = candidate[0]
        plt.plot(x, y, 'bo', color='#00ff00')
        if not candidate[1] in s:
            all = False
            break
    print('All found in the ball tree approach?   %s' % all)

    # traverse(tree, plt)
    plt.show()
Ejemplo n.º 52
0
from time import time
import numpy as np
from ball_tree import BallTree, KDTree
from sklearn import neighbors

X = np.random.random((20000, 3))
X_query = np.random.random((20000, 3))

t0 = time()
BT = BallTree(X, 30)
t1 = time()
print "BT construction: %.2g sec" % (t1 - t0)

t0 = time()
KDT = KDTree(X, 30)
t1 = time()
print "KDT construction: %.2g sec" % (t1 - t0)

for k in 1, 2, 4, 8:
    print "\nquery %i in [%i, %i]:" % (k, X.shape[0], X.shape[1])
    print "      single     dual"
    t0 = time()
    d1, i1 = BT.query(X_query, k, dualtree=False)
    t1 = time()
    d1, i1 = BT.query(X_query, k, dualtree=True)
    t2 = time()
    print "  BT: %.3g sec   %.3g sec" % (t1 - t0, t2 - t1)

    d2, i2 = KDT.query(X_query, k, dualtree=False)
    t3 = time()
    d2, i2 = KDT.query(X_query, k, dualtree=True)