def fit_dbscan(data, eps, min_samples, normalize=True, show=True, juxta_cluster_indices_grouped=None, threshold_legend=None): X = np.transpose(data) if normalize: from sklearn.preprocessing import minmax_scale minmax_scale(X, feature_range=(-1, 1), axis=0, copy=False) from sklearn.cluster import DBSCAN from sklearn import metrics db = DBSCAN(eps=eps, min_samples=min_samples).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) score = metrics.silhouette_score(X, labels, sample_size=5000) print('For eps={}, min_samples={}, estimated number of clusters={}'.format(eps, min_samples, n_clusters_)) print("Silhouette Coefficient: {}".format(score)) if show: pf.show_clustered_tsne(db, X, juxta_cluster_indices_grouped, threshold_legend) return db, n_clusters_, labels, core_samples_mask, score
def fit_dbscan(data, eps, min_samples, show=True, juxta_cluster_indices_grouped=None, threshold_legend=None): X = np.transpose(data) db = DBSCAN(eps=eps, min_samples=min_samples).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True db_labels = db.labels_ # Number of clusters in db_labels, ignoring noise if present. n_clusters_ = len(set(db_labels)) - (1 if -1 in db_labels else 0) score = metrics.silhouette_score(X, db_labels, sample_size=5000) print('For eps={}, min_samples={}, estimated number of clusters={}'.format(eps, min_samples, n_clusters_)) print("Silhouette Coefficient: {}".format(score)) if show: pf.show_clustered_tsne(db, X, juxta_cluster_indices_grouped, threshold_legend) return db, n_clusters_, db_labels, core_samples_mask, score
# Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) score = metrics.silhouette_score(X, labels, sample_size=5000) print('For eps={}, min_samples={}, estimated number of clusters={}'.format(eps, min_samples, n_clusters_)) print("Silhouette Coefficient: {}".format(score)) if show: pf.show_clustered_tsne(db, X, juxta_cluster_indices_grouped, threshold_legend) return db, n_clusters_, labels, core_samples_mask, score db, n_clusters_, labels, core_samples_mask, score = fit_dbscan(t_tsne, eps=0.0155, min_samples=43, normalize=True, show=True) pf.show_clustered_tsne(db, X, juxta_cluster_indices_grouped=None, threshold_legend=None) from scipy import stats # Loop over values of eps and min_samples to find best fit for DBSCAN epses = np.arange(0.1, 0.5, 0.05) min_sampleses = np.arange(5, 100, 5) clustering_scores = [] params = [] for eps in epses: for min_samples in min_sampleses: db, n_clusters_, labels, core_samples_mask, score = fit_dbscan(t_tsne, eps, min_samples, show=False)