コード例 #1
0
ファイル: bench_ball_tree.py プロジェクト: spdd/BinaryTree
def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30):
    print("Ball Tree")
    X = np.random.random((N, D)).astype(DTYPE)

    t0 = time()
    btskl = skBallTree(X, leaf_size=leaf_size)
    t1 = time()
    bt = BallTree(X, leaf_size=leaf_size)
    t2 = time()

    print("Build:")
    print("  sklearn : %.2g sec" % (t1 - t0))
    print("  new     : %.2g sec" % (t2 - t1))

    t0 = time()
    Dskl, Iskl = btskl.query(X, k)
    t1 = time()

    dist = [Dskl]
    ind = [Iskl]
    times = [t1 - t0]
    labels = ['sklearn']
    counts = [-1]

    for dualtree in (False, True):
        for breadth_first in (False, True):
            bt.reset_n_calls()
            t0 = time()
            D, I = bt.query(X,
                            k,
                            dualtree=dualtree,
                            breadth_first=breadth_first)
            t1 = time()
            dist.append(D)
            ind.append(I)
            times.append(t1 - t0)
            counts.append(bt.get_n_calls())

            if dualtree:
                label = 'dual/'
            else:
                label = 'single/'

            if breadth_first:
                label += 'breadthfirst'
            else:
                label += 'depthfirst'
            labels.append(label)

    print("Query:")
    for lab, t, c in zip(labels, times, counts):
        print("  %s : %.2g sec (%i calls)" % (lab, t, c))
    print
    print(
        " distances match: %s" % ', '.join([
            '%s' % np.allclose(dist[i - 1], dist[i]) for i in range(len(dist))
        ]))
    print(
        " indices match: %s" % ', '.join(
            ['%s' % np.allclose(ind[i - 1], ind[i]) for i in range(len(ind))]))
コード例 #2
0
    def _check_p_distance_vs_KDT(self, p):
        bt = BallTree(self.X, leaf_size=10, metric='minkowski', p=p)
        kdt = cKDTree(self.X, leafsize=10)

        dist_bt, ind_bt = bt.query(self.X, k=5)
        dist_kd, ind_kd = kdt.query(self.X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
コード例 #3
0
    def _check_p_distance_vs_KDT(self, p):
        bt = BallTree(self.X, leaf_size=10, metric='minkowski', p=p)
        kdt = cKDTree(self.X, leafsize=10)

        dist_bt, ind_bt = bt.query(self.X, k=5)
        dist_kd, ind_kd = kdt.query(self.X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
コード例 #4
0
ファイル: test_ball_tree.py プロジェクト: jakevdp/BinaryTree
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = bt.query(Y, k, dualtree=dualtree,
                               breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)
コード例 #5
0
ファイル: bench_ball_tree.py プロジェクト: jakevdp/BinaryTree
def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30):
    print("Ball Tree")
    X = np.random.random((N, D)).astype(DTYPE)

    t0 = time()
    btskl = skBallTree(X, leaf_size=leaf_size)
    t1 = time()
    bt = BallTree(X, leaf_size=leaf_size)
    t2 = time()

    print("Build:")
    print("  sklearn : %.2g sec" % (t1 - t0))
    print("  new     : %.2g sec" % (t2 - t1))

    t0 = time()
    Dskl, Iskl = btskl.query(X, k)
    t1 = time()

    dist = [Dskl]
    ind = [Iskl]
    times = [t1 - t0]
    labels = ['sklearn']
    counts = [-1]

    for dualtree in (False, True):
        for breadth_first in (False, True):
            bt.reset_n_calls()
            t0 = time()
            D, I = bt.query(X, k, dualtree=dualtree,
                            breadth_first=breadth_first)
            t1 = time()
            dist.append(D)
            ind.append(I)
            times.append(t1 - t0)
            counts.append(bt.get_n_calls())

            if dualtree:
                label = 'dual/'
            else:
                label = 'single/'

            if breadth_first:
                label += 'breadthfirst'
            else:
                label += 'depthfirst'
            labels.append(label)

    print("Query:")
    for lab, t, c in zip(labels, times, counts):
        print("  %s : %.2g sec (%i calls)" % (lab, t, c))
    print
    print(" distances match: %s"
          % ', '.join(['%s' % np.allclose(dist[i - 1], dist[i])
                       for i in range(len(dist))]))
    print(" indices match: %s"
          % ', '.join(['%s' % np.allclose(ind[i - 1], ind[i])
                       for i in range(len(ind))]))
コード例 #6
0
ファイル: test_ball_tree.py プロジェクト: spdd/BinaryTree
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = bt.query(Y,
                               k,
                               dualtree=dualtree,
                               breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)
コード例 #7
0
def test_ball_tree_query():
    X = np.random.random(size=(100, 5))

    for k in (2, 4, 6):
        bt = BallTree(X)
        kdt = cKDTree(X)

        dist_bt, ind_bt = bt.query(X, k=k)
        dist_kd, ind_kd = kdt.query(X, k=k)

        assert_array_almost_equal(dist_bt, dist_kd)
コード例 #8
0
def test_ball_tree_query():
    X = np.random.random(size=(100, 5))

    for k in (2, 4, 6):
        bt = BallTree(X)
        kdt = cKDTree(X)

        dist_bt, ind_bt = bt.query(X, k=k)
        dist_kd, ind_kd = kdt.query(X, k=k)

        assert_array_almost_equal(dist_bt, dist_kd)
コード例 #9
0
def test_ball_tree_p_distance():
    X = np.random.random(size=(100, 5))

    for p in (1, 2, 3, 4, np.inf):
        bt = BallTree(X, leaf_size=10, metric="minkowski", p=p)
        kdt = cKDTree(X, leafsize=10)

        dist_bt, ind_bt = bt.query(X, k=5)
        dist_kd, ind_kd = kdt.query(X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
コード例 #10
0
def test_ball_tree_p_distance():
    X = np.random.random(size=(100, 5))

    for p in (1, 2, 3, 4, np.inf):
        bt = BallTree(X, leaf_size=10, metric='minkowski', p=p)
        kdt = cKDTree(X, leafsize=10)

        dist_bt, ind_bt = bt.query(X, k=5)
        dist_kd, ind_kd = kdt.query(X, k=5, p=p)

        assert_array_almost_equal(dist_bt, dist_kd)
コード例 #11
0
    def _check_metrics_bool(self, k, metric, kwargs):
        bt = BallTree(self.Xbool, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.Ybool, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.cdist(self.Ybool, self.Xbool)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.Ybool.shape[0])[:, None], ind_dm]

        # we don't check the indices here because there are very often
        # ties for nearest neighbors, which cause the test to fail.
        # Distances will be correct in either case.
        assert_array_almost_equal(dist_bt, dist_dm)
コード例 #12
0
    def _check_metrics_float(self, k, metric, kwargs):
        bt = BallTree(self.X, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.X, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.pdist(self.X, squareform=True)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.X.shape[0])[:, None], ind_dm]

        # we don't check the indices here because if there is a tie for
        # nearest neighbor, then the test may fail.  Distances will reflect
        # whether the search was successful
        assert_array_almost_equal(dist_bt, dist_dm)
コード例 #13
0
    def _check_metrics_bool(self, k, metric, kwargs):
        bt = BallTree(self.Xbool, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.Ybool, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.cdist(self.Ybool, self.Xbool)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.Ybool.shape[0])[:, None], ind_dm]
        
        # we don't check the indices here because there are very often
        # ties for nearest neighbors, which cause the test to fail.
        # Distances will be correct in either case.
        assert_array_almost_equal(dist_bt, dist_dm)
コード例 #14
0
    def _check_metrics_float(self, k, metric, kwargs):
        bt = BallTree(self.X, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.X, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.pdist(self.X, squareform=True)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.X.shape[0])[:, None], ind_dm]

        # we don't check the indices here because if there is a tie for
        # nearest neighbor, then the test may fail.  Distances will reflect
        # whether the search was successful
        assert_array_almost_equal(dist_bt, dist_dm)
コード例 #15
0
ファイル: test_ball_tree.py プロジェクト: spdd/BinaryTree
def test_ball_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    bt1 = BallTree(X, leaf_size=1)
    ind1, dist1 = bt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)
        ind2, dist2 = bt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
コード例 #16
0
ファイル: test_ball_tree.py プロジェクト: jakevdp/BinaryTree
def test_ball_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    bt1 = BallTree(X, leaf_size=1)
    ind1, dist1 = bt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)
        ind2, dist2 = bt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
コード例 #17
0
ファイル: neighbors.py プロジェクト: gmassei/bastelpython
def kneighbors_graph(X, n_neighbors, weight=None, ball_tree=None,
                     window_size=1):
    """Computes the (weighted) graph of k-Neighbors

    Parameters
    ----------
    X : array-like, shape = [n_samples, n_features]
        Coordinates of samples. One sample per row.

    n_neighbors : int
        Number of neighbors for each sample.

    weight : None (default)
        Weights to apply on graph edges. If weight is None
        then no weighting is applied (1 for each edge).
        If weight equals "distance" the edge weight is the
        euclidian distance. If weight equals "barycenter"
        the weights are barycenter weights estimated by
        solving a linear system for each point.

    ball_tree : None or instance of precomputed BallTree

    window_size : int
        Window size pass to the BallTree

    Returns
    -------
    A : sparse matrix, shape = [n_samples, n_samples]
        A is returned as LInked List Sparse matrix
        A[i,j] = weight of edge that connects i to j

    Examples
    --------
    >>> X = [[0], [2], [1]]
    >>> from scikits.learn.neighbors import kneighbors_graph
    >>> A = kneighbors_graph(X, 2)
    >>> A.todense()
    matrix([[ 1.,  0.,  1.],
            [ 0.,  1.,  1.],
            [ 0.,  1.,  1.]])
    """
    from scipy import sparse
    X = np.asanyarray(X)
    n_samples = X.shape[0]
    if ball_tree is None:
        ball_tree = BallTree(X, window_size)
    A = sparse.lil_matrix((n_samples, ball_tree.size))
    dist, ind = ball_tree.query(X, k=n_neighbors)
    if weight is None:
        for i, li in enumerate(ind):
            if n_neighbors > 1:
                A[i, list(li)] = np.ones(n_neighbors)
            else:
                A[i, li] = 1.0
    elif weight is "distance":
        for i, li in enumerate(ind):
            if n_neighbors > 1:
                A[i, list(li)] = dist[i, :]
            else:
                A[i, li] = dist[i, 0]
    elif weight is "barycenter":
        # XXX : the next loop could be done in parallel
        # by parallelizing groups of indices
        for i, li in enumerate(ind):
            if n_neighbors > 1:
                X_i = ball_tree.data[li]
                A[i, list(li)] = barycenter_weights(X[i], X_i)
            else:
                A[i, li] = 1.0
    else:
        raise ValueError("Unknown weight type")
    return A
コード例 #18
0
    rseed = np.random.randint(100000)
    print "rseed = %i" % rseed
    np.random.seed(rseed)
    X = np.random.random((200, 3))
    Y = np.random.random((100, 3))

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1, n1 = SBT.query(Y, 3)
    t1 = time()

    print "python: %.2g sec" % (t1 - t0)

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1a, n1a = SBT.query_dual(Y, 3)
    t1 = time()

    print "python dual: %.2g sec" % (t1 - t0)

    t0 = time()
    BT = BallTree(X, leaf_size=10)
    d2, n2 = BT.query(Y, 3)
    t1 = time()

    print "cython: %.2g sec" % (t1 - t0)

    print "neighbors match:", np.allclose(n1, n2), np.allclose(n1a, n1)
    print "distances match:", np.allclose(d1, d2), np.allclose(d1a, d1)
コード例 #19
0
ファイル: bench_ball.py プロジェクト: weilinear/pyDistances
from time import time
import numpy as np
from ball_tree import BallTree

X = np.random.random((10000, 3))

t0 = time()
BT = BallTree(X, 30)
t1 = time()
print "construction: %.2g sec" % (t1 - t0)

for k in [1, 2, 4, 8]:
    for dual in (False, True):
        t0 = time()
        BT.query(X, k, dualtree=dual)
        t1 = time()
        if dual:
            dual_str = ' (dual)'
        else:
            dual_str = ''
        print "query %i in [%i, %i]%s: %.3g sec" % (k, X.shape[0], X.shape[1],
                                                    dual_str, t1 - t0)

for r in 0.1, 0.3, 0.5:
    t0 = time()
    BT.query_radius(X[:1000], r)
    t1 = time()
    print "query r<%.1f in [%i, %i]: %.3g sec" % (r, X.shape[0], X.shape[1],
                                                  t1 - t0)
コード例 #20
0
ファイル: neighbors.py プロジェクト: gmassei/bastelpython
class Neighbors(BaseEstimator, ClassifierMixin):
    """Classifier implementing k-Nearest Neighbor Algorithm.

    Parameters
    ----------
    data : array-like, shape (n, k)
        The data points to be indexed. This array is not copied, and so
        modifying this data will result in bogus results.
    labels : array
        An array representing labels for the data (only arrays of
        integers are supported).
    n_neighbors : int
        default number of neighbors.
    window_size : int
        Window size passed to BallTree

    Examples
    --------
    >>> samples = [[0.,0.,1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
    >>> labels = [0,0,1,1]
    >>> from scikits.learn.neighbors import Neighbors
    >>> neigh = Neighbors(n_neighbors=3)
    >>> neigh.fit(samples, labels)
    Neighbors(n_neighbors=3, window_size=1)
    >>> print neigh.predict([[0,0,0]])
    [ 0.]

    Notes
    -----
    http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm
    """

    def __init__(self, n_neighbors=5, window_size=1):
        """Internally uses the ball tree datastructure and algorithm for fast
        neighbors lookups on high dimensional datasets.
        """
        self.n_neighbors = n_neighbors
        self.window_size = window_size

    def fit(self, X, Y=()):
        # we need Y to be an integer, because after we'll use it an index
        self.Y = np.asanyarray(Y, dtype=np.int)
        self.ball_tree = BallTree(X, self.window_size)
        return self

    def kneighbors(self, data, n_neighbors=None):
        """Finds the K-neighbors of a point.

        Parameters
        ----------
        point : array-like
            The new point.
        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        dist : array
            Array representing the lengths to point.
        ind : array
            Array representing the indices of the nearest points in the
            population matrix.

        Examples
        --------
        In the following example, we construnct a Neighbors class from an
        array representing our data set and ask who's the closest point to
        [1,1,1]

        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
        >>> labels = [0, 0, 1]
        >>> from scikits.learn.neighbors import Neighbors
        >>> neigh = Neighbors(n_neighbors=1)
        >>> neigh.fit(samples, labels)
        Neighbors(n_neighbors=1, window_size=1)
        >>> print neigh.kneighbors([1., 1., 1.])
        (array(0.5), array(2))

        As you can see, it returns [0.5], and [2], which means that the
        element is at distance 0.5 and is the third element of samples
        (indexes start at 0). You can also query for multiple points:

        >>> print neigh.kneighbors([[0., 1., 0.], [1., 0., 1.]])
        (array([ 0.5       ,  1.11803399]), array([1, 2]))

        """
        if n_neighbors is None:
            n_neighbors = self.n_neighbors
        return self.ball_tree.query(data, k=n_neighbors)

    def predict(self, T, n_neighbors=None):
        """Predict the class labels for the provided data.

        Parameters
        ----------
        test: array
            A 2-D array representing the test point.
        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        labels: array
            List of class labels (one for each data sample).

        Examples
        --------
        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
        >>> labels = [0, 0, 1]
        >>> from scikits.learn.neighbors import Neighbors
        >>> neigh = Neighbors(n_neighbors=1)
        >>> neigh.fit(samples, labels)
        Neighbors(n_neighbors=1, window_size=1)
        >>> print neigh.predict([.2, .1, .2])
        0
        >>> print neigh.predict([[0., -1., 0.], [3., 2., 0.]])
        [0 1]
        """
        T = np.asanyarray(T)
        if n_neighbors is None:
            n_neighbors = self.n_neighbors
        return _predict_from_BallTree(self.ball_tree, self.Y, T, n_neighbors)
コード例 #21
0
ファイル: bench_ball.py プロジェクト: jakevdp/pyDistances
from time import time
import numpy as np
from ball_tree import BallTree

X = np.random.random((10000, 3))

t0 = time()
BT = BallTree(X, 30)
t1 = time()
print "construction: %.2g sec" % (t1 - t0)

for k in [1, 2, 4, 8]:
    for dual in (False, True):
        t0 = time()
        BT.query(X, k, dualtree=dual)
        t1 = time()
        if dual:
            dual_str = ' (dual)'
        else:
            dual_str = ''
        print "query %i in [%i, %i]%s: %.3g sec" % (k, X.shape[0],
                                                    X.shape[1],
                                                    dual_str,
                                                    t1 - t0)
    
for r in 0.1, 0.3, 0.5:
    t0 = time()
    BT.query_radius(X[:1000], r)
    t1 = time()
    print "query r<%.1f in [%i, %i]: %.3g sec" % (r, X.shape[0], X.shape[1],
                                                  t1 - t0)
コード例 #22
0
 def test_pickle(self):
     bt1 = BallTree(self.X, leaf_size=1)
     ind1, dist1 = bt1.query(self.X)
     for protocol in (0, 1, 2):
         yield (self._check_pickle, protocol, bt1, ind1, dist1)
コード例 #23
0
 def test_pickle(self):
     bt1 = BallTree(self.X, leaf_size=1)
     ind1, dist1 = bt1.query(self.X)
     for protocol in (0, 1, 2):
         yield (self._check_pickle, protocol, bt1, ind1, dist1)
コード例 #24
0
ファイル: test_ball_tree.py プロジェクト: spdd/BinaryTree
 def check_neighbors(metric):
     bt = BallTree(X, leaf_size=1, metric=metric)
     dist1, ind1 = bt.query(Y, k)
     dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
     assert_allclose(dist1, dist2)
コード例 #25
0
ファイル: slow_ball_tree.py プロジェクト: jakevdp/pyDistances
    rseed = np.random.randint(100000)
    print "rseed = %i" % rseed
    np.random.seed(rseed)
    X = np.random.random((200, 3))
    Y = np.random.random((100, 3))

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1, n1 = SBT.query(Y, 3)
    t1 = time()

    print "python: %.2g sec" % (t1 - t0)

    t0 = time()
    SBT = SlowBallTree(X, leaf_size=10)
    d1a, n1a = SBT.query_dual(Y, 3)
    t1 = time()

    print "python dual: %.2g sec" % (t1 - t0)

    t0 = time()
    BT = BallTree(X, leaf_size=10)
    d2, n2 = BT.query(Y, 3)
    t1 = time()

    print "cython: %.2g sec" % (t1 - t0)

    print "neighbors match:", np.allclose(n1, n2), np.allclose(n1a, n1)
    print "distances match:", np.allclose(d1, d2), np.allclose(d1a, d1)
コード例 #26
0
ファイル: test_ball_tree.py プロジェクト: jakevdp/BinaryTree
 def check_neighbors(metric):
     bt = BallTree(X, leaf_size=1, metric=metric)
     dist1, ind1 = bt.query(Y, k)
     dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
     assert_allclose(dist1, dist2)
コード例 #27
0
ファイル: bench_kdt.py プロジェクト: weilinear/pyDistances
t0 = time()
BT = BallTree(X, 30)
t1 = time()
print "BT construction: %.2g sec" % (t1 - t0)

t0 = time()
KDT = KDTree(X, 30)
t1 = time()
print "KDT construction: %.2g sec" % (t1 - t0)

for k in 1, 2, 4, 8:
    print "\nquery %i in [%i, %i]:" % (k, X.shape[0], X.shape[1])
    print "      single     dual"
    t0 = time()
    d1, i1 = BT.query(X_query, k, dualtree=False)
    t1 = time()
    d1, i1 = BT.query(X_query, k, dualtree=True)
    t2 = time()
    print "  BT: %.3g sec   %.3g sec" % (t1 - t0, t2 - t1)

    d2, i2 = KDT.query(X_query, k, dualtree=False)
    t3 = time()
    d2, i2 = KDT.query(X_query, k, dualtree=True)
    t4 = time()
    print "  KDT: %.3g sec   %.3g sec" % (t3 - t2, t4 - t3)
    print "       (results match: %s)" % np.allclose(d1, d2)

#for r in 0.1, 0.3, 0.5:
#    for tree in (BT, KDT):
#        t0 = time()