def test_run2():
    '''
  Parameters
  ----------
  checks: number of nodes to check (?)
  '''
    n = 100
    k = 3
    ops = 10
    test_n = 1

    x = random_vectors(n)
    test_points = random_vectors(test_n)

    index = Index(x, w=(0.5, 0.5))

    for i in range(n // ops):
        ur = index.run(ops)
        print(ur)

        ids1, dists1 = index.knn_search_points(test_points, k, checks=1)
        # ids2, dists2 = index.knn_search_points(test_points, k, checks = 50)
        ids3, dists3 = index.knn_search_points(test_points, k, checks=100)
        print("1: ", ids1)
        print("1: ", dists1)
        # print("2: ", ids2)
        print("3: ", ids3)
        print("3: ", dists3)
        print(index.size())
def test_run():
    """
  RUN
  ----------
  INCREMENTALLY ADD POINTS TO THE TREE STRUCTURE

  Parameters
  ----------
  ops: number of ops

  Returns
  -------
  numPointsInserted: total number of points inserted
  addPointOps: number of ops allocated to add
  updateIndexOps: number of ops allocated to update index
  addPointResult: number of added points
  updateIndexResult: ?
  addPointElapsed: time elapsed for add
  updateIndexElapsed: time elapsed for update index
  """
    x = random_vectors(n=30, d=3)
    index = Index(x, w=(0.5, 0.5))

    ops = 6

    for i in range(x.shape[0] // ops):
        ur = index.run(ops)
        print("===========")
        print("index.size(): ",
              index.size())  # index.size grows as we run iteratively
        print(ur)
Beispiel #3
0
class KNNKernelDensity:
    SQRT2PI = np.sqrt(2 * np.pi)

    def __init__(self, X: np.ndarray[Any, Any], online: Optional[bool] = False):
        self.X = X
        self.index = Index(X)
        if not online:
            self.index.add_points(len(X))

    def run(self, ops: Any) -> Any:
        return self.index.run(ops)

    def run_ids(self, ids: Iterable[int]) -> Any:
        return self.index.run_ids(ids)

    def score_samples(
        self, X: np.ndarray[Any, Any], k: int = 10, bandwidth: float = 0.2
    ) -> float:
        _, dists = self.index.knn_search_points(X, k=k)
        scores = self._gaussian_score(dists, bandwidth) / k
        return scores

    def _gaussian_score(self, dists: float, bandwidth: float) -> float:
        logg = -0.5 * (dists / bandwidth) ** 2
        g = np.exp(logg) / bandwidth / self.SQRT2PI
        return g.sum(axis=1)  # type: ignore
Beispiel #4
0
class KNNKernelDensity():
    SQRT2PI = np.sqrt(2 * np.pi)

    def __init__(self, X, online=False):
        self.X = X
        self.index = Index(X)
        
        if not online: # if offline
            self.index.add_points(len(X))

    def run(self, ops):
        return self.index.run(ops)

    def run_ids(self, ids):
        return self.index.run_ids(ids)

    def score_samples(self, X, k=10, bandwidth=0.2):
        _, dists = self.index.knn_search_points(X, k=k)
        scores = self._gaussian_score(dists, bandwidth) / k
        return scores

    def _gaussian_score(self, dists, bandwidth):
        logg = -0.5 * (dists / bandwidth) ** 2
        g = np.exp(logg) / bandwidth / self.SQRT2PI
        return g.sum(axis=1)
Beispiel #5
0
    def test_incremental_run1(self):
        x = random_vectors()

        index = Index(x, w=(0.5, 0.5))
        self.assertTrue(index.is_using_pyarray)
        ops = 20

        for i in range(x.shape[0] // ops):
            ur = index.run(ops)

            self.assertEqual(index.size(), (i + 1) * ops)
            self.assertEqual(ur['addPointResult'], ops)
Beispiel #6
0
class KNNRegressor():
    def __init__(self, X, y, n_neighbors=5, weights='uniform', online=False):
        self.X = X
        self.y = y
        self.index = Index(X)
        self.n_neighbors = n_neighbors
        self.weights = weights

        if not online:  # if offline
            self.index.add_points(len(X))

    def run(self, ops):
        return self.index.run(ops)

    def predict(self, X):
        indices, dists = self.index.knn_search_points(X, k=self.n_neighbors)
        weights = self._get_weights(dists)

        if self.weights == 'uniform':
            y_pred = np.mean(self.y[indices], axis=1)
        else:
            y_pred = np.empty((X.shape[0], self.y.shape[1]))
            denom = np.sum(weights, axis=1)

            for j in range(self.y.shape[1]):
                num = np.sum(self.y[indices, j] * weights, axis=1)
                y_pred[:, j] = num / denom

        if self.y.ndim == 1:
            y_pred = y_pred.ravel()

        return y_pred

    def _get_weights(self, dists):
        if self.weights == 'uniform':
            return None

        for i, dist in enumerate(dists):
            if 0. in dist:
                dists[i] = dist == 0.
            else:
                dists[i] = 1. / dist

        return dists
Beispiel #7
0
    def test_updates_after_all_points_added(self):
        np.random.seed(10)
        n = 10000
        w = (0.5, 0.5)
        x = random_vectors(n)
        ops = 1000

        index = Index(x, w=w)
        self.assertTrue(index.is_using_pyarray)

        index.add_points(n)  # add all points

        for i in range(1000):
            index.knn_search_points(random_vectors(100),
                                    10)  # accumulate losses

        for i in range(10):
            res = index.run(ops)

            self.assertEqual(res['addPointResult'], 0)
            self.assertEqual(res['updateIndexResult'], ops)
Beispiel #8
0
    def test_incremental_run2(self):
        n = 1000
        k = 20
        ops = 100
        test_n = 30

        x = random_vectors(n)
        test_points = random_vectors(test_n)

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)

        for i in range(n // ops):
            ur = index.run(ops)

            ids1, dists1 = index.knn_search_points(test_points, k, checks=100)
            ids2, dists2 = index.knn_search_points(test_points, k, checks=1000)
            """
            The assertion below always holds since later search checks a larger number of nodes and the search process is deterministic
            """
            self.assertEqual(np.sum(dists1 >= dists2), test_n * k)