def fast_dbscan(X, eps=0.5, min_samples=5, n_clusters=None, metric='euclidean', algorithm='brute', random_state: int = 1, framework: Literal['auto', 'cuml', 'sklearn'] = 'auto'): r""" DBSCAN - Density-Based Spatial Clustering of Applications with Noise. Finds core samples of high density and expands clusters from them. Good for data which contains clusters of similar density. DBSCAN is a very powerful if the datapoints tend to congregate in larger groups. Arguments: eps : float, default=0.5 The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. min_samples : int, default=5 The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself. algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='brute' The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details. """ kwargs = dict(locals()) X = kwargs.pop('X') framework = kwargs.pop('framework') n_cluster = kwargs.pop('n_clusters') random_state = kwargs.pop('random_state') ## fine-tuning the kwargs if _check_cuml(framework): from cuml.cluster import DBSCAN kwargs.pop('algorithm') kwargs.pop('metric') else: from sklearn.cluster import DBSCAN ## fitting dbscan = DBSCAN(**kwargs) dbscan.fit(X) dbscan._fitid = id(X) dbscan.predict = types.MethodType(dbscan_predict, dbscan) return dbscan