예제 #1
0
    def test_openmp_obj(self):
        N = 10000  # must be large enough

        x0 = random_vectors(N, dtype=np.float64)
        x = PseudoArray(x0)

        index = Index(x)
        self.assertFalse(index.is_using_pyarray)
        index.add_points(
            x.shape[0])  # we must add points before querying the index

        pts = np.asarray(x0, dtype=np.float32)

        for r in range(5):  # make cache ready
            idx, dists = index.knn_search_points(pts, 10)

        start = time.time()
        ids1, dists1 = index.knn_search_points(pts, 10, cores=1)
        elapsed1 = time.time() - start

        start = time.time()
        ids2, dists2 = index.knn_search_points(pts, 10, cores=4)
        elapsed2 = time.time() - start

        print("single thread: {:.2f} ms".format(elapsed1 * 1000))
        print("4 threads: {:.2f} ms".format(elapsed2 * 1000))
예제 #2
0
def test_knn_search():
    """
  KNN_SEARCH
  ----------
  GIVEN INDEX, RETURN INDEXES & DISTANCES in ASCENDING ORDER (including itself)

  Parameters
  ----------
  pid: index of target point
  k: number of points to find (WE MUST SET K LESS THAN OR EQUAL TO THE # OF POINTS)
  cores: number of cores to use
  checks:
  eps:
  sorted:

  Returns
  -------
  ids: ids of points found (numpy 2D array)
  dists: distances from target point (numpy 2D array)
  """
    x = random_vectors()
    index = Index(x)
    index.add_points(x.shape[0])

    # pick random integer
    pt = np.random.randint(x.shape[0])  # id. e.g.) 94
    print(x[[pt]])  # data. e.g.) [[0.64, ...]]

    idx, dist = index.knn_search(pt, 5, cores=1)
    print(idx)  # if pt=10, array([[10, 80, 87,  5, 95]])
    print(dist)  # array([[0, 0.76741797, 0.86952025, 0.90387696, 0.9157505 ]])
예제 #3
0
class KNNKernelDensity:
    SQRT2PI = np.sqrt(2 * np.pi)

    def __init__(self, X: np.ndarray[Any, Any], online: Optional[bool] = False):
        self.X = X
        self.index = Index(X)
        if not online:
            self.index.add_points(len(X))

    def run(self, ops: Any) -> Any:
        return self.index.run(ops)

    def run_ids(self, ids: Iterable[int]) -> Any:
        return self.index.run_ids(ids)

    def score_samples(
        self, X: np.ndarray[Any, Any], k: int = 10, bandwidth: float = 0.2
    ) -> float:
        _, dists = self.index.knn_search_points(X, k=k)
        scores = self._gaussian_score(dists, bandwidth) / k
        return scores

    def _gaussian_score(self, dists: float, bandwidth: float) -> float:
        logg = -0.5 * (dists / bandwidth) ** 2
        g = np.exp(logg) / bandwidth / self.SQRT2PI
        return g.sum(axis=1)  # type: ignore
예제 #4
0
class KNNKernelDensity():
    SQRT2PI = np.sqrt(2 * np.pi)

    def __init__(self, X, online=False):
        self.X = X
        self.index = Index(X)
        
        if not online: # if offline
            self.index.add_points(len(X))

    def run(self, ops):
        return self.index.run(ops)

    def run_ids(self, ids):
        return self.index.run_ids(ids)

    def score_samples(self, X, k=10, bandwidth=0.2):
        _, dists = self.index.knn_search_points(X, k=k)
        scores = self._gaussian_score(dists, bandwidth) / k
        return scores

    def _gaussian_score(self, dists, bandwidth):
        logg = -0.5 * (dists / bandwidth) ** 2
        g = np.exp(logg) / bandwidth / self.SQRT2PI
        return g.sum(axis=1)
예제 #5
0
    def test_return_shape_64(self):
        x = random_vectors(dtype=np.float64)

        index = Index(x)
        self.assertIs(x, index.array)
        self.assertTrue(index.is_using_pyarray)

        index.add_points(x.shape[0])

        for i in range(x.shape[0]):
            ids, dists = index.knn_search(i, 5)
            self.assertEqual(ids.shape, (1, 5))
            self.assertEqual(dists.shape, (1, 5))
예제 #6
0
    def test_large_k(self):
        x = random_vectors()
        q = random_vectors(1)
        k = x.shape[0] + 1  # make k larger than # of vectors in x

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(x.shape[0])

        with self.assertRaises(ValueError):
            index.knn_search(0, k)

        with self.assertRaises(ValueError):
            index.knn_search_points(q, k)
예제 #7
0
    def test_random_64(self):
        x = random_vectors(dtype=np.float64)

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(
            x.shape[0])  # we must add points before querying the index

        pt = np.random.randint(x.shape[0])
        pts = np.asarray(x[[pt]], dtype=np.float32)

        idx, dists = index.knn_search_points(pts, 1, cores=1)

        self.assertEqual(len(idx), 1)
        self.assertEqual(idx[0], pt)
예제 #8
0
def test_add_points():
    """
  ADD_POINTS
  ----------
  WE MUST ADD POINTS BEFORE QUERYING THE INDEX

  Parameters
  ----------
  ops: number of points to add
  """
    x = random_vectors(n=30)
    index = Index(x)

    print(index.size())  # 0 since we did not add any points
    index.add_points(1)  # add 1 point
    print(index.size())  # 1
    index.add_points(100000)
    print(index.size())  # 30 since we cannot add more than we have
예제 #9
0
파일: knnreg.py 프로젝트: jdfekete/PANENE
class KNNRegressor():
    def __init__(self, X, y, n_neighbors=5, weights='uniform', online=False):
        self.X = X
        self.y = y
        self.index = Index(X)
        self.n_neighbors = n_neighbors
        self.weights = weights

        if not online:  # if offline
            self.index.add_points(len(X))

    def run(self, ops):
        return self.index.run(ops)

    def predict(self, X):
        indices, dists = self.index.knn_search_points(X, k=self.n_neighbors)
        weights = self._get_weights(dists)

        if self.weights == 'uniform':
            y_pred = np.mean(self.y[indices], axis=1)
        else:
            y_pred = np.empty((X.shape[0], self.y.shape[1]))
            denom = np.sum(weights, axis=1)

            for j in range(self.y.shape[1]):
                num = np.sum(self.y[indices, j] * weights, axis=1)
                y_pred[:, j] = num / denom

        if self.y.ndim == 1:
            y_pred = y_pred.ravel()

        return y_pred

    def _get_weights(self, dists):
        if self.weights == 'uniform':
            return None

        for i, dist in enumerate(dists):
            if 0. in dist:
                dists[i] = dist == 0.
            else:
                dists[i] = 1. / dist

        return dists
예제 #10
0
    def test_updates_after_all_points_added(self):
        np.random.seed(10)
        n = 10000
        w = (0.5, 0.5)
        x = random_vectors(n)
        ops = 1000

        index = Index(x, w=w)
        self.assertTrue(index.is_using_pyarray)

        index.add_points(n)  # add all points

        for i in range(1000):
            index.knn_search_points(random_vectors(100),
                                    10)  # accumulate losses

        for i in range(10):
            res = index.run(ops)

            self.assertEqual(res['addPointResult'], 0)
            self.assertEqual(res['updateIndexResult'], ops)
예제 #11
0
    def test_openmp(self):
        N = 10000  # must be large enough

        x = random_vectors(N)

        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(
            x.shape[0])  # we must add points before querying the index

        for r in range(5):  # make cache ready
            idx, dists = index.knn_search_points(x, 10)

        start = time.time()
        ids1, dists1 = index.knn_search_points(x, 10, cores=1)
        elapsed1 = time.time() - start

        start = time.time()
        ids2, dists2 = index.knn_search_points(x, 10, cores=4)
        elapsed2 = time.time() - start

        print("single thread: {:.2f} ms".format(elapsed1 * 1000))
        print("4 threads: {:.2f} ms".format(elapsed2 * 1000))
예제 #12
0
def test_knn_search_points():
    """
  KNN_SEARCH_POINTS
  ----------
  GIVEN DATA(ARRAY), RETURN INDEXES & DISTANCES in ASCENDING ORDER (including itself)

  Parameters
  ----------
  points: data(2d array) of target point (any 2d array can be possible) e.g.) [[0.33, 0.61, ...]]
  k: number of points to find (WE MUST SET K LESS THAN OR EQUAL TO THE # OF POINTS)
  cores: number of cores to use
  checks:
  eps:
  sorted:

  Returns
  -------
  ids: ids of points found (numpy 2D array)
  dists: distances from target point (numpy 2D array)
  """
    x = random_vectors(n=10, d=3)
    index = Index(x)
    index.add_points(x.shape[0])

    # pick random integer
    pt = np.random.randint(x.shape[0])  # id. e.g.) 94

    # TEST ON RANDOM DATA POINT
    pts = np.asarray(x[[pt]], dtype=np.float32)
    idx2, dist2 = index.knn_search_points(pts, 3, cores=1)
    print(idx2)
    print(dist2)

    # TEST ON WHOLE DATA SET (ARRAY)
    idx3, dist3 = index.knn_search_points(x, 5, cores=1)
    print(idx3)
    print(dist3)
예제 #13
0
    def test_check_x_type(self):
        x = random_vectors()
        index = Index(x)
        self.assertTrue(index.is_using_pyarray)
        index.add_points(len(x))
        index.knn_search_points(x, 10)

        with self.assertRaises(ValueError):
            x = random_vectors(dtype=np.int32)
            index = Index(x)
            index.add_points(len(x))
            index.knn_search_points(x, 10)

        with self.assertRaises(ValueError):
            x = np.random.rand(100, 10)
            index = Index(x)
            index.add_points(len(x))
            index.knn_search_points(x, 10)