コード例 #1
0
def black_scholes(current, strike, maturity, rate, volatility):
  d1 = 1.0 / (volatility * sqrt(maturity)) * (
    log(current / strike) + (rate + volatility ** 2 / 2) * (maturity)
  )

  d2 = 1.0 / (volatility * sqrt(maturity)) * (
    log(current / strike) + (rate + volatility ** 2 / 2) * (maturity)
  ) - volatility * maturity

  call = norm_cdf(d1) * current - \
         norm_cdf(d2) * strike * exp(-rate * maturity)

  put = norm_cdf(-d2) * strike * exp(-rate * maturity) - \
        norm_cdf(-d1) * current

  return put, call
コード例 #2
0
ファイル: unsupervised.py プロジェクト: GabrielWen/spartan
  def kneighbors(self, X, n_neighbors=None):
    """Finds the K-neighbors of a point.

        Returns distance

        Parameters
        ----------
        X : array-like, last dimension same as that of fit data
            The new point.

        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        dist : array
            Array representing the lengths to point, only present if
            return_distance=True

        ind : array
            Indices of the nearest points in the population matrix.
    """
    if n_neighbors is not None:
      self.n_neighbors = n_neighbors

    if isinstance(X, np.ndarray):
      X = expr.from_numpy(X)

    if self.algorithm in ('auto', 'brute'):
      X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1]))
      fit_X_broadcast = expr.reshape(self.X, (1, self.X.shape[0], self.X.shape[1]))
      distances = expr.sum((X_broadcast - fit_X_broadcast) ** 2, axis=2)
      neigh_ind = expr.argsort(distances, axis=1)
      neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom()
      neigh_dist = expr.sort(distances, axis=1)
      neigh_dist = expr.sqrt(neigh_dist[:, :n_neighbors]).optimized().glom()
      return neigh_dist, neigh_ind
    else:
      results = self.X.foreach_tile(mapper_fn=_knn_mapper,
                                    kw={'X': self.X, 'Q': X,
                                        'n_neighbors': self.n_neighbors,
                                        'algorithm': self.algorithm})
      dist = None
      ind = None
      """ Get the KNN candidates for each tile of X, then find out the real KNN """
      for k, v in results.iteritems():
        if dist is None:
          dist = v[0]
          ind = v[1]
        else:
          dist = np.concatenate((dist, v[0]), axis=1)
          ind = np.concatenate((ind, v[1]), axis=1)

      mask = np.argsort(dist, axis=1)[:, :self.n_neighbors]
      new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)])
      new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)])
      return new_dist, new_ind
コード例 #3
0
ファイル: naive_bayes.py プロジェクト: EasonLiao/spartan
def fit(data, labels, label_size, alpha=1.0):
  '''
  Train standard naive bayes model.
 
  Args:
    data(Expr): documents to be trained.
    labels(Expr): the correct labels of the training data.
    label_size(int): the number of different labels.
    alpha(float): alpha parameter of naive bayes model.
  '''
  labels = expr.force(labels)
  
  # calc document freq
  df = expr.reduce(data,
                   axis=0,
                   dtype_fn=lambda input: input.dtype,
                   local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis),
                   accumulate_fn=np.add,
                   tile_hint=(data.shape[1],))
  
  idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1
   
  # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency 
  # by the root mean square of features frequencies in that document
  square_sum = expr.reduce(data,
                           axis=1,
                           dtype_fn=lambda input: input.dtype,
                           local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis),
                           accumulate_fn=np.add,
                           tile_hint=(data.shape[0],))
  
  rms = expr.sqrt(square_sum * 1.0 / data.shape[1])
  
  # calculate weight normalized Tf-Idf
  data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1]))
  
  # add up all the feature vectors with the same labels
  sum_instance_by_label = expr.ndarray((label_size, data.shape[1]),
                                       dtype=np.float64, 
                                       reduce_fn=np.add,
                                       tile_hint=(label_size / len(labels.tiles), data.shape[1]))
  sum_instance_by_label = expr.shuffle(data,
                                       _sum_instance_by_label_mapper,
                                       target=sum_instance_by_label,
                                       kw={'labels': labels, 'label_size': label_size})

  # sum up all the weights for each label from the previous step
  weights_per_label = expr.sum(sum_instance_by_label, axis=1, tile_hint=(label_size,))
  
  # generate naive bayes per_label_and_feature weights
  weights_per_label_and_feature = expr.shuffle(sum_instance_by_label,
                                               _naive_bayes_mapper,
                                               kw={'weights_per_label': weights_per_label, 
                                                   'alpha':alpha})
  
  return {'scores_per_label_and_feature': weights_per_label_and_feature.force(),
          'scores_per_label': weights_per_label.force(),
          }
コード例 #4
0
ファイル: naive_bayes.py プロジェクト: MaggieQi/spartan
def fit(data, labels, label_size, alpha=1.0):
  '''
  Train standard naive bayes model.
 
  Args:
    data(Expr): documents to be trained.
    labels(Expr): the correct labels of the training data.
    label_size(int): the number of different labels.
    alpha(float): alpha parameter of naive bayes model.
  '''
  # calc document freq
  df = expr.reduce(data,
                   axis=0,
                   dtype_fn=lambda input: input.dtype,
                   local_reduce_fn=lambda ex, data, axis: (data > 0).sum(axis),
                   accumulate_fn=np.add)
  
  idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1
   
  # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency 
  # by the root mean square of features frequencies in that document
  square_sum = expr.reduce(data,
                           axis=1,
                           dtype_fn=lambda input: input.dtype,
                           local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis),
                           accumulate_fn=np.add)
  
  rms = expr.sqrt(square_sum * 1.0 / data.shape[1])
  
  # calculate weight normalized Tf-Idf
  data = data / rms.reshape((data.shape[0], 1)) * idf.reshape((1, data.shape[1]))
  
  # add up all the feature vectors with the same labels
  #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64)
  #for i in range(label_size):
  #  i_mask = (labels == i)
  #  weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0))
  weights_per_label_and_feature = expr.shuffle(expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)),
                                               _sum_instance_by_label_mapper,
                                               target=expr.ndarray((label_size, data.shape[1]), dtype=np.float64, reduce_fn=np.add),
                                               kw={'labels': labels, 'label_size': label_size},
                                               cost_hint={hash(labels):{'00':0, '01':np.prod(labels.shape)}})

  # sum up all the weights for each label from the previous step
  weights_per_label = expr.sum(weights_per_label_and_feature, axis=1)
  
  # generate naive bayes per_label_and_feature weights
  weights_per_label_and_feature = expr.log((weights_per_label_and_feature + alpha) / 
                                           (weights_per_label.reshape((weights_per_label.shape[0], 1)) + 
                                            alpha * weights_per_label_and_feature.shape[1]))

  return {'scores_per_label_and_feature': weights_per_label_and_feature.optimized().force(),
          'scores_per_label': weights_per_label.optimized().force(),
          }
コード例 #5
0
  def _get_norm_of_each_item(self, rating_table):
    """Get norm of each item vector.
    For each Item, caculate the norm the item vector.
    Parameters
    ----------
    rating_table : Spartan matrix of shape(M, N). 
                   Each column represents the rating of the item.

    Returns
    ---------
    item_norm:  Spartan matrix of shape(N,).
                item_norm[i] equals || rating_table[:,i] || 

    """
    return expr.sqrt(expr.sum(expr.multiply(rating_table, rating_table), axis=0))
コード例 #6
0
    def _get_norm_of_each_item(self, rating_table):
        """Get norm of each item vector.
    For each Item, caculate the norm the item vector.
    Parameters
    ----------
    rating_table : Spartan matrix of shape(M, N). 
                   Each column represents the rating of the item.

    Returns
    ---------
    item_norm:  Spartan matrix of shape(N,).
                item_norm[i] equals || rating_table[:,i] || 

    """
        return expr.sqrt(
            expr.sum(expr.multiply(rating_table, rating_table), axis=0))
コード例 #7
0
def move(galaxy, dt):
    '''Move the bodies.
  First find forces and change velocity and then move positions.
  '''
    # `.reshape(add_tuple(a, 1))` is the spartan way of doing
    #   `ndarray[:, np.newaxis]` in numpy. While syntactically different, both
    #   add a dimension of length 1 after the other dimensions.
    #   e.g. (5, 5) becomes (5, 5, 1)

    # Calculate all distances component wise (with sign).
    dx_new = galaxy['x'].reshape(add_tuple(galaxy['x'].shape, [1]))
    dy_new = galaxy['y'].reshape(add_tuple(galaxy['y'].shape, [1]))
    dz_new = galaxy['z'].reshape(add_tuple(galaxy['z'].shape, [1]))
    dx = (galaxy['x'] - dx_new) * -1
    dy = (galaxy['y'] - dy_new) * -1
    dz = (galaxy['z'] - dz_new) * -1

    # Euclidean distances (all bodies).
    r = sqrt(dx**2 + dy**2 + dz**2)
    r = set_diagonal(r, 1.0)

    # Prevent collision.
    mask = r < 1.0
    #r = r * ~mask + 1.0 * mask
    r = spartan.map((r, mask), lambda x, m: x * ~m + 1.0 * m)

    m = galaxy['m'].reshape(add_tuple(galaxy['m'].shape, [1]))

    # Calculate the acceleration component wise.
    fx = G * m * dx / r**3
    fy = G * m * dy / r**3
    fz = G * m * dz / r**3

    # Set the force (acceleration) a body exerts on itself to zero.
    fx = set_diagonal(fx, 0.0)
    fy = set_diagonal(fy, 0.0)
    fz = set_diagonal(fz, 0.0)

    galaxy['vx'] += dt * expr.sum(fx, axis=0)
    galaxy['vy'] += dt * expr.sum(fy, axis=0)
    galaxy['vz'] += dt * expr.sum(fz, axis=0)

    galaxy['x'] += dt * galaxy['vx']
    galaxy['y'] += dt * galaxy['vy']
    galaxy['z'] += dt * galaxy['vz']
コード例 #8
0
ファイル: nbody.py プロジェクト: GabrielWen/spartan
def move(galaxy, dt):
  '''Move the bodies.
  First find forces and change velocity and then move positions.
  '''
  # `.reshape(add_tuple(a, 1))` is the spartan way of doing
  #   `ndarray[:, np.newaxis]` in numpy. While syntactically different, both
  #   add a dimension of length 1 after the other dimensions.
  #   e.g. (5, 5) becomes (5, 5, 1)

  # Calculate all distances component wise (with sign).
  dx_new = galaxy['x'].reshape(add_tuple(galaxy['x'].shape, [1]))
  dy_new = galaxy['y'].reshape(add_tuple(galaxy['y'].shape, [1]))
  dz_new = galaxy['z'].reshape(add_tuple(galaxy['z'].shape, [1]))
  dx = (galaxy['x'] - dx_new) * -1
  dy = (galaxy['y'] - dy_new) * -1
  dz = (galaxy['z'] - dz_new) * -1

  # Euclidean distances (all bodies).
  r = sqrt(dx**2 + dy**2 + dz**2)
  r = set_diagonal(r, 1.0)

  # Prevent collision.
  mask = r < 1.0
  #r = r * ~mask + 1.0 * mask
  r = spartan.map((r, mask), lambda x, m: x * ~m + 1.0 * m)

  m = galaxy['m'].reshape(add_tuple(galaxy['m'].shape, [1]))

  # Calculate the acceleration component wise.
  fx = G*m*dx / r**3
  fy = G*m*dy / r**3
  fz = G*m*dz / r**3

  # Set the force (acceleration) a body exerts on itself to zero.
  fx = set_diagonal(fx, 0.0)
  fy = set_diagonal(fy, 0.0)
  fz = set_diagonal(fz, 0.0)

  galaxy['vx'] += dt*expr.sum(fx, axis=0)
  galaxy['vy'] += dt*expr.sum(fy, axis=0)
  galaxy['vz'] += dt*expr.sum(fz, axis=0)

  galaxy['x'] += dt*galaxy['vx']
  galaxy['y'] += dt*galaxy['vy']
  galaxy['z'] += dt*galaxy['vz']
コード例 #9
0
ファイル: item_recommender.py プロジェクト: EasonLiao/spartan
  def _get_norm_of_each_item(self, rating_table):
    """Get norm of each item vector.
    For each Item, caculate the norm the item vector.
    Parameters
    ----------
    rating_table : Spartan matrix of shape(M, N). 
                   Each column represents the rating of the item.

    Returns
    ---------
    item_norm:  Spartan matrix of shape(N,).
                item_norm[i] equals || rating_table[:,i] || 

    """
    ctx = blob_ctx.get()
    if isinstance(rating_table, array.distarray.DistArray):
      rating_table = expr.lazify(rating_table)
    res = expr.sqrt(expr.sum(expr.multiply(rating_table, rating_table), axis=0, 
                             tile_hint=(rating_table.shape[1] / ctx.num_workers, )))
    return res.force()
コード例 #10
0
ファイル: base.py プロジェクト: rossparks/spartan
def center_data(X, y, fit_intercept, normalize=False):
  """
  Centers data to have mean zero along axis 0. This is here because
  nearly all linear models will want their data to be centered.
  """
  if fit_intercept:
    X_mean = X.mean(axis = 0)
    X_mean = expr.reshape(X_mean, (1, X_mean.shape[0]))
    X -= X_mean
    
    if normalize:
      X_std = expr.sqrt(expr.sum(X ** 2, axis=0)).force()
      X_std[X_std == 0] = 1
      X /= X_std
    else:
      X_std = expr.ones(X.shape[1])
    
    y_mean = y.mean(axis=0)
    y -= y_mean
  else:
    X_mean = expr.zeros(X.shape[1])
    X_std = expr.ones(X.shape[1])
    y_mean = 0. if y.ndim == 1 else expr.zeros(y.shape[1], dtype=X.dtype)
  return X, y, X_mean, y_mean, X_std
コード例 #11
0
ファイル: swaption.py プロジェクト: rossparks/spartan
def simulate(ts_all, te_all, lamb_all, num_paths):
  '''Range over a number of independent products.

  :param ts_all: DistArray
    Start dates for a series of swaptions.
  :param te_all: DistArray
    End dates for a series of swaptions.
  :param lamb_all: DistArray
    Parameter values for a series of swaptions.
  :param num_paths: Int
    Number of paths used in random walk.

  :rtype: DistArray

  '''
  swaptions = []
  i = 0
  for ts_a, te, lamb in zip(ts_all, te_all, lamb_all):
    for ts in ts_a:
      #start = time()
      print i
      time_structure = arange(None, 0, ts + DELTA, DELTA)
      maturity_structure = arange(None, 0, te, DELTA)

      ############# MODEL ###############
      # Variance reduction technique - Antithetic Variates.
      eps_tmp = randn(time_structure.shape[0] - 1, num_paths)
      eps = concatenate(eps_tmp, -eps_tmp, 1)

      # Forward LIBOR rates for the construction of the spot measure.
      f_kk = zeros((time_structure.shape[0], 2*num_paths))
      f_kk = assign(f_kk, np.s_[0, :], F_0)

      # Plane kxN of simulated LIBOR rates.
      f_kn = ones((maturity_structure.shape[0], 2*num_paths))*F_0

      # Simulations of the plane f_kn for each time step.
      for t in xrange(1, time_structure.shape[0]):
        f_kn_new = f_kn[1:, :]*exp(lamb*mu(f_kn, lamb)*DELTA-0.5*lamb*lamb *
            DELTA + lamb*eps[t - 1, :]*sqrt(DELTA))
        f_kk = assign(f_kk, np.s_[t, :], f_kn_new[0])
        f_kn = f_kn_new

      ############## PRODUCT ###############
      # Value of zero coupon bonds.
      zcb = ones((int((te-ts)/DELTA)+1, 2*num_paths))
      f_kn_modified = 1 + DELTA*f_kn
      for j in xrange(zcb.shape[0] - 1):
        zcb = assign(zcb, np.s_[j + 1], zcb[j] / f_kn_modified[j])

      # Swaption price at maturity.
      last_row = zcb[zcb.shape[0] - 1, :].reshape((20, ))
      swap_ts = maximum(1 - last_row - THETA*DELTA*expr.sum(zcb[1:], 0), 0)

      # Spot measure used for discounting.
      b_ts = ones((2*num_paths, ))
      tmp = 1 + DELTA * f_kk
      for j in xrange(int(ts/DELTA)):
        b_ts *= tmp[j].reshape((20, ))

      # Swaption price at time 0.
      swaption = swap_ts/b_ts

      # Save expected value in bps and std.
      me = mean((swaption[0:num_paths] + swaption[num_paths:])/2) * 10000
      st = std((swaption[0:num_paths] + swaption[num_paths:])/2)/sqrt(num_paths)*10000

      swaptions.append([me.optimized().force(), st.optimized().force()])
      #print time() - start
      i += 1
  return swaptions
コード例 #12
0
def fit(data, labels, label_size, alpha=1.0):
    '''
  Train standard naive bayes model.
 
  Args:
    data(Expr): documents to be trained.
    labels(Expr): the correct labels of the training data.
    label_size(int): the number of different labels.
    alpha(float): alpha parameter of naive bayes model.
  '''
    # calc document freq
    df = expr.reduce(data,
                     axis=0,
                     dtype_fn=lambda input: input.dtype,
                     local_reduce_fn=lambda ex, data, axis:
                     (data > 0).sum(axis),
                     accumulate_fn=np.add)

    idf = expr.log(data.shape[0] * 1.0 / (df + 1)) + 1

    # Normalized Frequency for a feature in a document is calculated by dividing the feature frequency
    # by the root mean square of features frequencies in that document
    square_sum = expr.reduce(
        data,
        axis=1,
        dtype_fn=lambda input: input.dtype,
        local_reduce_fn=lambda ex, data, axis: np.square(data).sum(axis),
        accumulate_fn=np.add)

    rms = expr.sqrt(square_sum * 1.0 / data.shape[1])

    # calculate weight normalized Tf-Idf
    data = data / rms.reshape((data.shape[0], 1)) * idf.reshape(
        (1, data.shape[1]))

    # add up all the feature vectors with the same labels
    #weights_per_label_and_feature = expr.ndarray((label_size, data.shape[1]), dtype=np.float64)
    #for i in range(label_size):
    #  i_mask = (labels == i)
    #  weights_per_label_and_feature = expr.assign(weights_per_label_and_feature, np.s_[i, :], expr.sum(data[i_mask, :], axis=0))
    weights_per_label_and_feature = expr.shuffle(
        expr.retile(data, tile_hint=util.calc_tile_hint(data, axis=0)),
        _sum_instance_by_label_mapper,
        target=expr.ndarray((label_size, data.shape[1]),
                            dtype=np.float64,
                            reduce_fn=np.add),
        kw={
            'labels': labels,
            'label_size': label_size
        },
        cost_hint={hash(labels): {
                       '00': 0,
                       '01': np.prod(labels.shape)
                   }})

    # sum up all the weights for each label from the previous step
    weights_per_label = expr.sum(weights_per_label_and_feature, axis=1)

    # generate naive bayes per_label_and_feature weights
    weights_per_label_and_feature = expr.log(
        (weights_per_label_and_feature + alpha) /
        (weights_per_label.reshape((weights_per_label.shape[0], 1)) +
         alpha * weights_per_label_and_feature.shape[1]))

    return {
        'scores_per_label_and_feature':
        weights_per_label_and_feature.optimized().force(),
        'scores_per_label':
        weights_per_label.optimized().force(),
    }
コード例 #13
0
ファイル: swaption.py プロジェクト: rgardner/spartan
def simulate(ts_all, te_all, lamb_all, num_paths):
    """Range over a number of independent products.

  :param ts_all: DistArray
    Start dates for a series of swaptions.
  :param te_all: DistArray
    End dates for a series of swaptions.
  :param lamb_all: DistArray
    Parameter values for a series of swaptions.
  :param num_paths: Int
    Number of paths used in random walk.

  :rtype: DistArray

  """
    swaptions = []
    i = 0
    for ts_a, te, lamb in zip(ts_all, te_all, lamb_all):
        for ts in ts_a:
            # start = time()
            print i
            time_structure = arange(None, 0, ts + DELTA, DELTA)
            maturity_structure = arange(None, 0, te, DELTA)

            ############# MODEL ###############
            # Variance reduction technique - Antithetic Variates.
            eps_tmp = randn(time_structure.shape[0] - 1, num_paths)
            eps = concatenate(eps_tmp, -eps_tmp, 1)

            # Forward LIBOR rates for the construction of the spot measure.
            f_kk = zeros((time_structure.shape[0], 2 * num_paths))
            f_kk = assign(f_kk, np.s_[0, :], F_0)

            # Plane kxN of simulated LIBOR rates.
            f_kn = ones((maturity_structure.shape[0], 2 * num_paths)) * F_0

            # Simulations of the plane f_kn for each time step.
            for t in xrange(1, time_structure.shape[0]):
                f_kn_new = f_kn[1:, :] * exp(
                    lamb * mu(f_kn, lamb) * DELTA - 0.5 * lamb * lamb * DELTA + lamb * eps[t - 1, :] * sqrt(DELTA)
                )
                f_kk = assign(f_kk, np.s_[t, :], f_kn_new[0])
                f_kn = f_kn_new

            ############## PRODUCT ###############
            # Value of zero coupon bonds.
            zcb = ones((int((te - ts) / DELTA) + 1, 2 * num_paths))
            f_kn_modified = 1 + DELTA * f_kn
            for j in xrange(zcb.shape[0] - 1):
                zcb = assign(zcb, np.s_[j + 1], zcb[j] / f_kn_modified[j])

            # Swaption price at maturity.
            last_row = zcb[zcb.shape[0] - 1, :].reshape((20,))
            swap_ts = maximum(1 - last_row - THETA * DELTA * expr.sum(zcb[1:], 0), 0)

            # Spot measure used for discounting.
            b_ts = ones((2 * num_paths,))
            tmp = 1 + DELTA * f_kk
            for j in xrange(int(ts / DELTA)):
                b_ts *= tmp[j].reshape((20,))

            # Swaption price at time 0.
            swaption = swap_ts / b_ts

            # Save expected value in bps and std.
            me = mean((swaption[0:num_paths] + swaption[num_paths:]) / 2) * 10000
            st = std((swaption[0:num_paths] + swaption[num_paths:]) / 2) / sqrt(num_paths) * 10000

            swaptions.append([me.optimized().force(), st.optimized().force()])
            # print time() - start
            i += 1
    return swaptions
コード例 #14
0
    def kneighbors(self, X, n_neighbors=None):
        """Finds the K-neighbors of a point.

        Returns distance

        Parameters
        ----------
        X : array-like, last dimension same as that of fit data
            The new point.

        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        Returns
        -------
        dist : array
            Array representing the lengths to point, only present if
            return_distance=True

        ind : array
            Indices of the nearest points in the population matrix.
    """
        if n_neighbors is not None:
            self.n_neighbors = n_neighbors

        if isinstance(X, np.ndarray):
            X = expr.from_numpy(X)

        if self.algorithm in ('auto', 'brute'):
            X_broadcast = expr.reshape(X, (X.shape[0], 1, X.shape[1]))
            fit_X_broadcast = expr.reshape(
                self.X, (1, self.X.shape[0], self.X.shape[1]))
            distances = expr.sum((X_broadcast - fit_X_broadcast)**2, axis=2)
            neigh_ind = expr.argsort(distances, axis=1)
            neigh_ind = neigh_ind[:, :n_neighbors].optimized().glom()
            neigh_dist = expr.sort(distances, axis=1)
            neigh_dist = expr.sqrt(
                neigh_dist[:, :n_neighbors]).optimized().glom()
            return neigh_dist, neigh_ind
        else:
            results = self.X.foreach_tile(mapper_fn=_knn_mapper,
                                          kw={
                                              'X': self.X,
                                              'Q': X,
                                              'n_neighbors': self.n_neighbors,
                                              'algorithm': self.algorithm
                                          })
            dist = None
            ind = None
            """ Get the KNN candidates for each tile of X, then find out the real KNN """
            for k, v in results.iteritems():
                if dist is None:
                    dist = v[0]
                    ind = v[1]
                else:
                    dist = np.concatenate((dist, v[0]), axis=1)
                    ind = np.concatenate((ind, v[1]), axis=1)

            mask = np.argsort(dist, axis=1)[:, :self.n_neighbors]
            new_dist = np.array([dist[i][mask[i]] for i, r in enumerate(dist)])
            new_ind = np.array([ind[i][mask[i]] for i, r in enumerate(ind)])
            return new_dist, new_ind