def test_run():
    """
  RUN
  ----------
  INCREMENTALLY ADD POINTS TO THE TREE STRUCTURE

  Parameters
  ----------
  ops: number of ops

  Returns
  -------
  numPointsInserted: total number of points inserted
  addPointOps: number of ops allocated to add
  updateIndexOps: number of ops allocated to update index
  addPointResult: number of added points
  updateIndexResult: ?
  addPointElapsed: time elapsed for add
  updateIndexElapsed: time elapsed for update index
  """
    x = random_vectors(n=30, d=3)
    index = Index(x, w=(0.5, 0.5))

    ops = 6

    for i in range(x.shape[0] // ops):
        ur = index.run(ops)
        print("===========")
        print("index.size(): ",
              index.size())  # index.size grows as we run iteratively
        print(ur)
Exemple #2
0
class KNNKernelDensity():
    SQRT2PI = np.sqrt(2 * np.pi)

    def __init__(self, X, online=False):
        self.X = X
        self.index = Index(X)
        
        if not online: # if offline
            self.index.add_points(len(X))

    def run(self, ops):
        return self.index.run(ops)

    def run_ids(self, ids):
        return self.index.run_ids(ids)

    def score_samples(self, X, k=10, bandwidth=0.2):
        _, dists = self.index.knn_search_points(X, k=k)
        scores = self._gaussian_score(dists, bandwidth) / k
        return scores

    def _gaussian_score(self, dists, bandwidth):
        logg = -0.5 * (dists / bandwidth) ** 2
        g = np.exp(logg) / bandwidth / self.SQRT2PI
        return g.sum(axis=1)
Exemple #3
0
class KNNKernelDensity:
    SQRT2PI = np.sqrt(2 * np.pi)

    def __init__(self, X: np.ndarray[Any, Any], online: Optional[bool] = False):
        self.X = X
        self.index = Index(X)
        if not online:
            self.index.add_points(len(X))

    def run(self, ops: Any) -> Any:
        return self.index.run(ops)

    def run_ids(self, ids: Iterable[int]) -> Any:
        return self.index.run_ids(ids)

    def score_samples(
        self, X: np.ndarray[Any, Any], k: int = 10, bandwidth: float = 0.2
    ) -> float:
        _, dists = self.index.knn_search_points(X, k=k)
        scores = self._gaussian_score(dists, bandwidth) / k
        return scores

    def _gaussian_score(self, dists: float, bandwidth: float) -> float:
        logg = -0.5 * (dists / bandwidth) ** 2
        g = np.exp(logg) / bandwidth / self.SQRT2PI
        return g.sum(axis=1)  # type: ignore
def test_knn_search():
    """
  KNN_SEARCH
  ----------
  GIVEN INDEX, RETURN INDEXES & DISTANCES in ASCENDING ORDER (including itself)

  Parameters
  ----------
  pid: index of target point
  k: number of points to find (WE MUST SET K LESS THAN OR EQUAL TO THE # OF POINTS)
  cores: number of cores to use
  checks:
  eps:
  sorted:

  Returns
  -------
  ids: ids of points found (numpy 2D array)
  dists: distances from target point (numpy 2D array)
  """
    x = random_vectors()
    index = Index(x)
    index.add_points(x.shape[0])

    # pick random integer
    pt = np.random.randint(x.shape[0])  # id. e.g.) 94
    print(x[[pt]])  # data. e.g.) [[0.64, ...]]

    idx, dist = index.knn_search(pt, 5, cores=1)
    print(idx)  # if pt=10, array([[10, 80, 87,  5, 95]])
    print(dist)  # array([[0, 0.76741797, 0.86952025, 0.90387696, 0.9157505 ]])
Exemple #5
0
    def __init__(self, X, y, n_neighbors=5, weights='uniform', online=False):
        self.X = X
        self.y = y
        self.index = Index(X)
        self.n_neighbors = n_neighbors
        self.weights = weights

        if not online:  # if offline
            self.index.add_points(len(X))
Exemple #6
0
    def test_incremental_run1(self):
        x = random_vectors()

        index = Index(x, w=(0.5, 0.5))
        self.assertTrue(index.is_using_pyarray)
        ops = 20

        for i in range(x.shape[0] // ops):
            ur = index.run(ops)

            self.assertEqual(index.size(), (i + 1) * ops)
            self.assertEqual(ur['addPointResult'], ops)
Exemple #7
0
    def test_return_shape_64(self):
        x = random_vectors(dtype=np.float64)

        index = Index(x)
        self.assertIs(x, index.array)
        self.assertTrue(index.is_using_pyarray)

        index.add_points(x.shape[0])

        for i in range(x.shape[0]):
            ids, dists = index.knn_search(i, 5)
            self.assertEqual(ids.shape, (1, 5))
            self.assertEqual(dists.shape, (1, 5))
Exemple #8
0
    def test_random_64(self):
        x = random_vectors(dtype=np.float64)

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(
            x.shape[0])  # we must add points before querying the index

        pt = np.random.randint(x.shape[0])
        pts = np.asarray(x[[pt]], dtype=np.float32)

        idx, dists = index.knn_search_points(pts, 1, cores=1)

        self.assertEqual(len(idx), 1)
        self.assertEqual(idx[0], pt)
Exemple #9
0
    def test_openmp_obj(self):
        N = 10000  # must be large enough

        x0 = random_vectors(N, dtype=np.float64)
        x = PseudoArray(x0)

        index = Index(x)
        self.assertFalse(index.is_using_pyarray)
        index.add_points(
            x.shape[0])  # we must add points before querying the index

        pts = np.asarray(x0, dtype=np.float32)

        for r in range(5):  # make cache ready
            idx, dists = index.knn_search_points(pts, 10)

        start = time.time()
        ids1, dists1 = index.knn_search_points(pts, 10, cores=1)
        elapsed1 = time.time() - start

        start = time.time()
        ids2, dists2 = index.knn_search_points(pts, 10, cores=4)
        elapsed2 = time.time() - start

        print("single thread: {:.2f} ms".format(elapsed1 * 1000))
        print("4 threads: {:.2f} ms".format(elapsed2 * 1000))
Exemple #10
0
class KNNRegressor():
    def __init__(self, X, y, n_neighbors=5, weights='uniform', online=False):
        self.X = X
        self.y = y
        self.index = Index(X)
        self.n_neighbors = n_neighbors
        self.weights = weights

        if not online:  # if offline
            self.index.add_points(len(X))

    def run(self, ops):
        return self.index.run(ops)

    def predict(self, X):
        indices, dists = self.index.knn_search_points(X, k=self.n_neighbors)
        weights = self._get_weights(dists)

        if self.weights == 'uniform':
            y_pred = np.mean(self.y[indices], axis=1)
        else:
            y_pred = np.empty((X.shape[0], self.y.shape[1]))
            denom = np.sum(weights, axis=1)

            for j in range(self.y.shape[1]):
                num = np.sum(self.y[indices, j] * weights, axis=1)
                y_pred[:, j] = num / denom

        if self.y.ndim == 1:
            y_pred = y_pred.ravel()

        return y_pred

    def _get_weights(self, dists):
        if self.weights == 'uniform':
            return None

        for i, dist in enumerate(dists):
            if 0. in dist:
                dists[i] = dist == 0.
            else:
                dists[i] = 1. / dist

        return dists
Exemple #11
0
    def test_incremental_run2(self):
        n = 1000
        k = 20
        ops = 100
        test_n = 30

        x = random_vectors(n)
        test_points = random_vectors(test_n)

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)

        for i in range(n // ops):
            ur = index.run(ops)

            ids1, dists1 = index.knn_search_points(test_points, k, checks=100)
            ids2, dists2 = index.knn_search_points(test_points, k, checks=1000)
            """
            The assertion below always holds since later search checks a larger number of nodes and the search process is deterministic
            """
            self.assertEqual(np.sum(dists1 >= dists2), test_n * k)
def test_run2():
    '''
  Parameters
  ----------
  checks: number of nodes to check (?)
  '''
    n = 100
    k = 3
    ops = 10
    test_n = 1

    x = random_vectors(n)
    test_points = random_vectors(test_n)

    index = Index(x, w=(0.5, 0.5))

    for i in range(n // ops):
        ur = index.run(ops)
        print(ur)

        ids1, dists1 = index.knn_search_points(test_points, k, checks=1)
        # ids2, dists2 = index.knn_search_points(test_points, k, checks = 50)
        ids3, dists3 = index.knn_search_points(test_points, k, checks=100)
        print("1: ", ids1)
        print("1: ", dists1)
        # print("2: ", ids2)
        print("3: ", ids3)
        print("3: ", dists3)
        print(index.size())
Exemple #13
0
    def test_large_k(self):
        x = random_vectors()
        q = random_vectors(1)
        k = x.shape[0] + 1  # make k larger than # of vectors in x

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(x.shape[0])

        with self.assertRaises(ValueError):
            index.knn_search(0, k)

        with self.assertRaises(ValueError):
            index.knn_search_points(q, k)
def test_add_points():
    """
  ADD_POINTS
  ----------
  WE MUST ADD POINTS BEFORE QUERYING THE INDEX

  Parameters
  ----------
  ops: number of points to add
  """
    x = random_vectors(n=30)
    index = Index(x)

    print(index.size())  # 0 since we did not add any points
    index.add_points(1)  # add 1 point
    print(index.size())  # 1
    index.add_points(100000)
    print(index.size())  # 30 since we cannot add more than we have
def test_index():
    """
  Index
  ----------
  BUILD KNN INDEX WE CAN WORK ON

  Parameters
  ----------
  array: number of points to add
  w: tree weight e.g.) (0.3, 0.7)
  reconstruction_weight: 
  trees:
  """
    x = random_vectors(n=100, d=10)
    index = Index(x)
    print(index.array.shape)  # dim = (n, d) = (100, 10)
Exemple #16
0
    def test_updates_after_all_points_added(self):
        np.random.seed(10)
        n = 10000
        w = (0.5, 0.5)
        x = random_vectors(n)
        ops = 1000

        index = Index(x, w=w)
        self.assertTrue(index.is_using_pyarray)

        index.add_points(n)  # add all points

        for i in range(1000):
            index.knn_search_points(random_vectors(100),
                                    10)  # accumulate losses

        for i in range(10):
            res = index.run(ops)

            self.assertEqual(res['addPointResult'], 0)
            self.assertEqual(res['updateIndexResult'], ops)
Exemple #17
0
    def test_openmp(self):
        N = 10000  # must be large enough

        x = random_vectors(N)

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(
            x.shape[0])  # we must add points before querying the index

        for r in range(5):  # make cache ready
            idx, dists = index.knn_search_points(x, 10)

        start = time.time()
        ids1, dists1 = index.knn_search_points(x, 10, cores=1)
        elapsed1 = time.time() - start

        start = time.time()
        ids2, dists2 = index.knn_search_points(x, 10, cores=4)
        elapsed2 = time.time() - start

        print("single thread: {:.2f} ms".format(elapsed1 * 1000))
        print("4 threads: {:.2f} ms".format(elapsed2 * 1000))
def test_knn_search_points():
    """
  KNN_SEARCH_POINTS
  ----------
  GIVEN DATA(ARRAY), RETURN INDEXES & DISTANCES in ASCENDING ORDER (including itself)

  Parameters
  ----------
  points: data(2d array) of target point (any 2d array can be possible) e.g.) [[0.33, 0.61, ...]]
  k: number of points to find (WE MUST SET K LESS THAN OR EQUAL TO THE # OF POINTS)
  cores: number of cores to use
  checks:
  eps:
  sorted:

  Returns
  -------
  ids: ids of points found (numpy 2D array)
  dists: distances from target point (numpy 2D array)
  """
    x = random_vectors(n=10, d=3)
    index = Index(x)
    index.add_points(x.shape[0])

    # pick random integer
    pt = np.random.randint(x.shape[0])  # id. e.g.) 94

    # TEST ON RANDOM DATA POINT
    pts = np.asarray(x[[pt]], dtype=np.float32)
    idx2, dist2 = index.knn_search_points(pts, 3, cores=1)
    print(idx2)
    print(dist2)

    # TEST ON WHOLE DATA SET (ARRAY)
    idx3, dist3 = index.knn_search_points(x, 5, cores=1)
    print(idx3)
    print(dist3)
Exemple #19
0
 def __init__(self, X, online=False):
     self.X = X
     self.index = Index(X)
     
     if not online: # if offline
         self.index.add_points(len(X))
Exemple #20
0
 def test_check_type(self):
     with self.assertRaises(AttributeError):
         Index([[0, 1]])  # no shape
Exemple #21
0
 def __init__(self, X: np.ndarray[Any, Any], online: Optional[bool] = False):
     self.X = X
     self.index = Index(X)
     if not online:
         self.index.add_points(len(X))
Exemple #22
0
    def test_check_x_type(self):
        x = random_vectors()
        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(len(x))
        index.knn_search_points(x, 10)

        with self.assertRaises(ValueError):
            x = random_vectors(dtype=np.int32)
            index = Index(x)
            index.add_points(len(x))
            index.knn_search_points(x, 10)

        with self.assertRaises(ValueError):
            x = np.random.rand(100, 10)
            index = Index(x)
            index.add_points(len(x))
            index.knn_search_points(x, 10)