Example #1
0
def fast_knn(X,
             n_clusters=5,
             n_neighbors=None,
             graph_mode='distance',
             cluster_mode='spectral',
             algorithm='brute',
             n_jobs=1,
             random_state=1234,
             force_sklearn=False):
    r"""
  Arguments:
    X : `ndarray` or tuple of (X, y)
    n_neighbors: int (default = 5)
      The top K closest datapoints you want the algorithm to return.
      Currently, this value must be < 1024.
    graph_mode : {'distance', 'connectivity'}, default='distance'
      This mode decides which values `kneighbors_graph` will return:
        - 'connectivity' : will return the connectivity matrix with ones and
          zeros (for 'SpectralClustering').
        - 'distance' : will return the distances between neighbors according
          to the given metric (for 'DBSCAN').
    cluster_mode: {'vote', 'spectral', 'isomap'}, default='vote'
        This mode decides how to generate cluster prediction from the
        neighbors graph:
        - 'dbscan' :
        - 'spectral' :
        - 'isomap' :
        - 'kmeans' :
    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
        Algorithm used to compute the nearest neighbors:
        - 'ball_tree' will use :class:`BallTree`
        - 'kd_tree' will use :class:`KDTree`
        - 'brute' will use a brute-force search.
        - 'auto' will attempt to decide the most appropriate algorithm
          based on the values passed to :meth:`fit` method.
        Note: fitting on sparse input will override the setting of
        this parameter, using brute force.
  """
    kwargs = dict(locals())
    X = kwargs.pop('X')
    force_sklearn = kwargs.pop('force_sklearn')
    random_state = kwargs.pop('random_state')
    n_clusters = int(kwargs.pop('n_clusters'))
    if n_neighbors is None:
        kwargs['n_neighbors'] = n_clusters
        n_neighbors = n_clusters
    ## graph mode
    graph_mode = str(kwargs.pop('graph_mode')).strip().lower()
    assert graph_mode in ('distance', 'connectivity')
    ## cluster mode
    cluster_mode = str(kwargs.pop('cluster_mode')).strip().lower()
    ## fine-tuning the kwargs
    use_cuml = _check_cuml(force_sklearn)
    if use_cuml:
        from cuml.neighbors import NearestNeighbors
        kwargs['n_gpus'] = kwargs['n_jobs']
        kwargs.pop('n_jobs')
        kwargs.pop('algorithm')
    else:
        from sklearn.neighbors import NearestNeighbors
    ## fitting
    knn = NearestNeighbors(**kwargs)
    knn.fit(X)
    knn._fitid = id(X)
    ## Transform mode
    knn._random_state = random_state
    knn._n_clusters = n_clusters
    knn._graph_mode = graph_mode
    knn._cluster_mode = cluster_mode
    if use_cuml:
        knn.n_samples_fit_ = X.shape[0]
    knn.kneighbors_graph = types.MethodType(nn_kneighbors_graph, knn)
    knn.transform = types.MethodType(nn_transform, knn)
    knn.fit_transform = types.MethodType(nn_fit_transform, knn)
    knn.predict = types.MethodType(nn_predict, knn)
    return knn