Beispiel #1
0
  def kneighbors(self, X, n_neighbors=None):
    """Finds the K-neighbors of a point.

        Returns distance

        Parameters
        ----------
        X : array-like, last dimension same as that of fit data
            The new point.

        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        dist : array
            Array representing the lengths to point, only present if
            return_distance=True

        ind : array
            Indices of the nearest points in the population matrix.
    """
    if n_neighbors is not None:
      self.n_neighbors = n_neighbors

    if isinstance(X, np.ndarray):
      X = expr.from_numpy(X)

    if self.algorithm in ('auto', 'brute'):
      X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1]))
      fit_X_broadcast = expr.reshape(self.X, (1, self.X.shape[0], self.X.shape[1]))
      distances = expr.sum((X_broadcast - fit_X_broadcast) ** 2, axis=2)
      neigh_ind = expr.argsort(distances, axis=1)
      neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom()
      neigh_dist = expr.sort(distances, axis=1)
      neigh_dist = expr.sqrt(neigh_dist[:, :n_neighbors]).optimized().glom()
      return neigh_dist, neigh_ind
    else:
      results = self.X.foreach_tile(mapper_fn=_knn_mapper,
                                    kw={'X': self.X, 'Q': X,
                                        'n_neighbors': self.n_neighbors,
                                        'algorithm': self.algorithm})
      dist = None
      ind = None
      """ Get the KNN candidates for each tile of X, then find out the real KNN """
      for k, v in results.iteritems():
        if dist is None:
          dist = v[0]
          ind = v[1]
        else:
          dist = np.concatenate((dist, v[0]), axis=1)
          ind = np.concatenate((ind, v[1]), axis=1)

      mask = np.argsort(dist, axis=1)[:, :self.n_neighbors]
      new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)])
      new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)])
      return new_dist, new_ind
Beispiel #2
0
  def test_ndimension(self):
    for case in xrange(5):
      dim = np.random.randint(low=2, high=6)
      shape = np.random.randint(low=5, high=11, size=dim)
      util.log_info('Test Case #%s: DIM(%s) shape%s', case + 1, dim, shape)

      na = new_ndarray(shape)
      a = expr.from_numpy(na)

      for axis in xrange(dim):
        Assert.all_eq(expr.sort(a, axis).glom(),
                      np.sort(na, axis))
        Assert.all_eq(expr.argsort(a, axis).glom(),
                      np.argsort(na, axis))
Beispiel #3
0
    def kneighbors(self, X, n_neighbors=None):
        """Finds the K-neighbors of a point.

        Returns distance

        Parameters
        ----------
        X : array-like, last dimension same as that of fit data
            The new point.

        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        dist : array
            Array representing the lengths to point, only present if
            return_distance=True

        ind : array
            Indices of the nearest points in the population matrix.
    """
        if n_neighbors is not None:
            self.n_neighbors = n_neighbors

        if isinstance(X, np.ndarray):
            X = expr.from_numpy(X)

        if self.algorithm in ('auto', 'brute'):
            X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1]))
            fit_X_broadcast = expr.reshape(
                self.X, (1, self.X.shape[0], self.X.shape[1]))
            distances = expr.sum((X_broadcast - fit_X_broadcast)**2, axis=2)
            neigh_ind = expr.argsort(distances, axis=1)
            neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom()
            neigh_dist = expr.sort(distances, axis=1)
            neigh_dist = expr.sqrt(
                neigh_dist[:, :n_neighbors]).optimized().glom()
            return neigh_dist, neigh_ind
        else:
            results = self.X.foreach_tile(mapper_fn=_knn_mapper,
                                          kw={
                                              'X': self.X,
                                              'Q': X,
                                              'n_neighbors': self.n_neighbors,
                                              'algorithm': self.algorithm
                                          })
            dist = None
            ind = None
            """ Get the KNN candidates for each tile of X, then find out the real KNN """
            for k, v in results.iteritems():
                if dist is None:
                    dist = v[0]
                    ind = v[1]
                else:
                    dist = np.concatenate((dist, v[0]), axis=1)
                    ind = np.concatenate((ind, v[1]), axis=1)

            mask = np.argsort(dist, axis=1)[:, :self.n_neighbors]
            new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)])
            new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)])
            return new_dist, new_ind