def search_for_clusters(self): if self.n_entries > 2 * threshold_n_points: try: vars = ["ra_deg", "dec_deg", "rfc_score"] data = np.array([[getattr(x, var) for var in vars] for x in self.entries]) k_means = cluster.MeanShift() k_means.fit(data) self.clf["Spatial"] = k_means.labels_ vars.append("date_mjd") all_data = np.array([[getattr(x, var) for var in vars] for x in self.entries]) all_k_means = cluster.MeanShift() all_k_means.fit(all_data) # print all_k_means, all_k_means.labels_, all_k_means.fit(all_data) # # raw_input("prompt") self.clf["Time"] = all_k_means.labels_ except ValueError: pass
def definition_clusters(subset): #Importante -> normalizar el conjunto de datos que utilizamos normalized_set = preprocessing.normalize(subset, norm='l2') print("-------- Definiendo los clusteres...") k_means = cl.KMeans(init='k-means++', n_clusters=5, n_init=100) # estimate bandwidth for mean shift bandwidth = cl.estimate_bandwidth(normalized_set, quantile=0.3) ms = cl.MeanShift(bandwidth=bandwidth, bin_seeding=True) #Utilizarlo para casos de estudio pequeños spectral = cl.SpectralClustering(n_clusters=5, affinity="rbf") dbscan = cl.DBSCAN(eps=0.1) #Ponemos threshold bajo porque nos daba un warning en el fit_predict brc = cl.Birch(n_clusters=5, threshold=0.1) #Los añadimos a una lista clustering_algorithms = (('K-Means', k_means), ('MeanShift', ms), ('DBSCAN', dbscan), ('Birch', brc), ('SpectralClustering', spectral)) return clustering_algorithms
def definition_clusters(subset): #Importante -> normalizar el conjunto de datos que utilizamos normalized_set = preprocessing.normalize(subset, norm='l2') print("-------- Definiendo los clusteres...") k_means = cl.KMeans(init='k-means++', n_clusters=5, n_init=100) # estimate bandwidth for mean shift bandwidth = cl.estimate_bandwidth(normalized_set, quantile=0.3) ms = cl.MeanShift(bandwidth=bandwidth) two_means = cl.MiniBatchKMeans(n_clusters=5, init='k-means++') # connectivity matrix for structured Ward connectivity = kneighbors_graph(normalized_set, n_neighbors=10, include_self=False) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) ward = cl.AgglomerativeClustering(n_clusters=5, linkage='ward') #dbscan = cl.DBSCAN(eps=0.3, n_clusters=5) brc = cl.Birch(n_clusters=5, threshold=0.1) #Los añadimos a una lista clustering_algorithms = (('K-Means', k_means), ('MiniBatchKMeans', two_means), ('MeanShift', ms), ('Agglomerative', ward), ('Birch', brc)) return clustering_algorithms
def ClusterHouses(matches, plot_groups=False): groups = {} try: N = len(matches) X = np.zeros((N, 2)) for m in range(N): loc = RFAPI.house_location(matches[m]) #logging.debug("ClusterHouses({})".format(loc)) X[m] = (loc[0], loc[1]) params = { 'quantile': .3, 'eps': .15, 'damping': .9, 'preference': -5, 'n_neighbors': 2, 'n_clusters': 5 } # a bit buggy.. spectral = cluster.SpectralClustering( n_clusters=params['n_clusters'], eigen_solver='arpack', affinity="nearest_neighbors") # best so far! gmm = mixture.GaussianMixture(n_components=params['n_clusters'], covariance_type='full') # yielded one cluster.. affinity_propagation = cluster.AffinityPropagation( damping=params['damping'], preference=params['preference']) bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile']) ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) algorithm = ms algorithm.fit(X) if hasattr(algorithm, 'labels_'): y_pred = algorithm.labels_.astype(np.int) else: y_pred = algorithm.predict(X) for m in range(len(matches)): key = str(y_pred[m]) if groups.get(key, None) == None: groups[key] = [] groups[key].append({ "adress": RFAPI.house_address(matches[m]), "location": [X[m][0], X[m][1]] }) logging.debug("groups = {}".format(groups)) if plot_groups: HouseScore._plot_groups(X, y_pred) except Exception as e: groups["error"] = str(e) logging.error(groups["error"]) return groups
def definition_clusters(subset): #Importante -> normalizar el conjunto de datos que utilizamos normalized_set = preprocessing.normalize(subset, norm='l2') print("-------- Definiendo los clusteres...") k_means = cl.KMeans(init='k-means++', n_clusters=5, n_init=100) two_means = cl.MiniBatchKMeans(n_clusters=5, init='k-means++') # estimate bandwidth for mean shift bandwidth = cl.estimate_bandwidth(normalized_set, quantile=0.3) ms = cl.MeanShift(bandwidth=bandwidth) # connectivity matrix for structured Ward #connectivity = kneighbors_graph(normalized_set, n_neighbors=10, include_self=False) # make connectivity symmetric #connectivity = 0.5 * (connectivity + connectivity.T) ward = cl.AgglomerativeClustering(n_clusters=100, linkage='ward') average = cl.AgglomerativeClustering(n_clusters=100, linkage='average') #Utilizarlo para casos de estudio pequeños #n_jobs = -1 para q vaya en paralelo #spectral = cl.SpectralClustering(n_clusters=3, affinity="nearest_neighbors",n_jobs=-1, n_neighbors = 3) #dbscan = cl.DBSCAN(eps=0.3) #Los añadimos a una lista clustering_algorithms = (('K-Means', k_means), ('MeanShift', ms), ('MiniBatchMeans', two_means), ('AgglomerativeWard', ward), ('AgglomerativeAverage', average)) return clustering_algorithms
def fit_meanshift(self, data, bandwidth=None, bin_seeding=False, **kwargs): """ Fit MeanShift clustering algorithm to data. Parameters ---------- data : array-like A dataset formatted by `classifier.fitting_data`. bandwidth : float The bandwidth value used during clustering. If none, determined automatically. Note: the data are scaled before clutering, so this is not in the same units as the data. bin_seeding : bool Whether or not to use 'bin_seeding'. See documentation for `sklearn.cluster.MeanShift`. **kwargs passed to `sklearn.cluster.MeanShift`. Returns ------- Fitted `sklearn.cluster.MeanShift` object. """ if bandwidth is None: bandwidth = cl.estimate_bandwidth(data) ms = cl.MeanShift(bandwidth=bandwidth, bin_seeding=bin_seeding) ms.fit(data) return ms
def investigateOptimalAlgorithms(kmerId, kmerPca): plot.setLibrary('bokeh') pca = kmerPca.loc[:, PCA_DATA_COL_NAMES] plots = {} algos = (('KMeans', cluster.KMeans()), ('Affinity', cluster.AffinityPropagation()), ('MeanShift', cluster.MeanShift()), ('Spectral', cluster.SpectralClustering()), ('Agglomerative', cluster.AgglomerativeClustering(linkage='average')), ('Agglomerative', cluster.AgglomerativeClustering(linkage='ward')), ('DBSCAN', cluster.DBSCAN()), ('Gaussian', GaussianMixture())) ## Visualise data and manually determine which algorithm will be good for i, (name, algo) in enumerate(algos, 1): labels = _getLabels(algo, pca) labels = pd.DataFrame(labels, columns=[CLABEL_COL_NAME]) kmerDf = pd.concat([kmerId, pca, labels], axis=1) dataset = hv.Dataset(kmerDf, PCA_DATA_COL_NAMES) scatter = dataset.to(hv.Scatter, PCA_DATA_COL_NAMES, groupby=CLABEL_COL_NAME).overlay() scatter.opts(opts.Scatter(size=10, show_legend=True)) plots[name] = scatter plots = hv.HoloMap(plots, kdims='algo') plots = plots.collate() return plots
def clusterization(cluster_radius, number_of_processors, atom_coords): """ It builds the clusters according to the atomic coordinates that are supplied. PARAMETERS ---------- cluster_radius : int radius that defines the width of each cluster. number_of_processors: int number of processors that will be used to read the trajectories and clusterize all the points. atom_coords : list filtered list of ordered atom coordinates. RETURNS ------- estimator : sklearn.cluster.MeanShift object clusterization implementation that clusterizes through the MeanShift method. results : list list with the results of the clusterization. Each element is the cluster in which each atom belongs. """ if (number_of_processors > 2 and number_of_processors == cpu_count()): number_of_processors = int(number_of_processors / 2) estimator = cluster.MeanShift(bandwidth=cluster_radius, n_jobs=number_of_processors, cluster_all=True) results = estimator.fit_predict(atom_coords) return estimator, results
def meanshift(samples, samples_to_predict): # bandwidth = cluster.estimate_bandwidth(samples, n_jobs=-1, n_samples=10000, quantile=0.2) bandwidth = 60 print('bandwidth: {}'.format(bandwidth)) ms = cluster.MeanShift(bandwidth=bandwidth, n_jobs=-1) ms.fit(samples) return ms.predict(samples_to_predict)
def cluster_model(newdata, data, model_name, input_param): ds = data params = input_param if str.lower(model_name) == 'kmeans': cluster_obj = cluster.KMeans(n_clusters=params['n_clusters']) if str.lower(model_name) == str.lower('MiniBatchKMeans'): cluster_obj = cluster.MiniBatchKMeans(n_clusters=params['n_clusters']) if str.lower(model_name) == str.lower('SpectralClustering'): cluster_obj = cluster.SpectralClustering(n_clusters=params['n_clusters']) if str.lower(model_name) == str.lower('MeanShift'): cluster_obj = cluster.MeanShift(bandwidth=params['bandwidth']) if str.lower(model_name) == str.lower('DBSCAN'): cluster_obj = cluster.DBSCAN(eps=params['eps']) if str.lower(model_name) == str.lower('AffinityPropagation'): cluster_obj = cluster.AffinityPropagation(damping=params['damping'], preference=params['preference']) cluster_obj.fit(ds) if str.lower(model_name) == str.lower('Birch'): cluster_obj = cluster.Birch(n_clusters=input_param['n_clusters']) if str.lower(model_name) == str.lower('GaussianMixture'): cluster_obj = mixture.GaussianMixture(n_components=params['n_clusters'], covariance_type='full') cluster_obj.fit(ds) if str.lower(model_name) in ['affinitypropagation', 'gaussianmixture']: model_result = cluster_obj.predict(ds) else: model_result = cluster_obj.fit_predict(ds) newdata[model_name] = pd.DataFrame(model_result) return newdata
def update_data(self, attrname, old, new): #store the models here models = [ cluster.MiniBatchKMeans(n_clusters=self.k_means_slider.value), cluster.DBSCAN(eps=self.DBSCAN_slider.value), cluster.Birch(n_clusters=self.birch_slider.value), cluster.MeanShift(bandwidth=self.bandwidth, bin_seeding=True) ] #AgglomerativeClustering assert len(models) == 4 for model in models: model.fit(self.X) for i in range(4): if hasattr(model, 'labels_'): y_pred = models[i].labels_.astype(np.int) else: y_pred = models[i].predict(self.X) self.colors[i] = [Spectral6[f % 6] for f in y_pred] self.source[i].data['colors'] = self.colors[i]
def meanshift(feat, bw, num_process, min_bin_freq, **kwargs): print('#num_process:', num_process) print('min_bin_freq:', min_bin_freq) ms = cluster.MeanShift(bandwidth=bw, n_jobs=num_process, min_bin_freq=min_bin_freq).fit(feat) return ms.labels_
def get_data(session_ds, inc_eval_ds, ms_band, db_eps): session_data = list(session_ds) inc_eval_data = list(inc_eval_ds) session_emb = np.squeeze([utils.t2a(d[0][0]) for d in session_data]) session_lab = np.squeeze([d[1] for d in session_data]) inc_eval_emb = np.squeeze([utils.t2a(d[0][0]) for d in inc_eval_data]) inc_eval_lab = np.squeeze([d[1] for d in inc_eval_data]) X = np.concatenate((session_emb, inc_eval_emb)) y = np.concatenate((session_lab, inc_eval_lab)) meanshifts = [cl.MeanShift(bandwidth=b).fit_predict(X) for b in ms_band] optics = cl.OPTICS(min_samples=1).fit_predict(X) dbscans = [cl.DBSCAN(eps=e, min_samples=1).fit_predict(X) for e in db_eps] res = np.array(meanshifts + dbscans + [optics]) inc_pred = res[:, session_lab.size:] aris = [adjusted_rand_score(p, inc_eval_lab) for p in inc_pred] amis = [ adjusted_mutual_info_score(p, inc_eval_lab, average_method='max') for p in inc_pred ] return np.array(aris), np.array(amis), inc_pred, inc_eval_lab
def use_meanShift(mat, n_cluster): clusters = cls.MeanShift().fit(mat) n_cluster = max(clusters.labels_) + 1 hist, bin_edges = np.histogram(clusters.labels_, bins=np.arange(n_cluster + 1)) print 'Mean Shift clustering:', clusters.labels_ print hist return clusters.labels_
def configuraciones_meanshift(subset): normalized_set = preprocessing.normalize(subset, norm='l2') # estimate bandwidth for mean shift bandwidth1 = cl.estimate_bandwidth(normalized_set, quantile=0.3) bandwidth2 = cl.estimate_bandwidth(normalized_set, quantile=0.4) bandwidth3 = cl.estimate_bandwidth(normalized_set, quantile=0.5) ms1 = cl.MeanShift(bandwidth=bandwidth1) ms2 = cl.MeanShift(bandwidth=bandwidth2) ms3 = cl.MeanShift(bandwidth=bandwidth3) #Los añadimos a una lista clustering_algorithms = (('MeanShift-1', ms1), ('MeanShift-2', ms2), ('MeanShift-3', ms3)) return clustering_algorithms
def meanshift_1(): x_data, y_label = datasets.make_blobs(n_samples=500, random_state=20) y_predict = cluster.MeanShift().fit_predict(x_data) color = ['red', 'green', 'blue'] for x, y in zip(x_data, y_predict): plt.scatter(x[0], x[1], c=color[y]) plt.show()
def getSortedRowClusters(self, objs): ''' Determine row clusters and their order. Clusters that create rows are determined by the user-specified algorithm. They are then sorted by location, and lists of indices for each cluster are returned in order. ''' if self.row_algorithm == 'affinity': algorithm = cluster.AffinityPropagation(**self.row_params) elif self.row_algorithm == 'DBSCAN': algorithm = cluster.DBSCAN(**self.row_params) elif self.row_algorithm == 'MeanShift': algorithm = cluster.MeanShift(**self.row_params) Y = np.array([[y.baseline] for y in objs], dtype=np.float64) rows = algorithm.fit_predict(Y) if self.row_algorithm == 'affinity': # Here, samples are the found location, so just sort directly. row_set = set(rows) def ordered_clusters(): # ABBYY coordinates are bottom-to-top, so reverse list. for i in sorted(row_set, reverse=True): yield np.where(rows == i)[0] return ordered_clusters(), len(row_set), False elif self.row_algorithm == 'DBSCAN': # Here, samples are labelled, so go back and find the original # locations. fuzzy = -1 in rows num_clusters = len(set(rows)) - (1 if fuzzy else 0) clusters = [] cluster_centres = np.empty(num_clusters) for i in range(num_clusters): index = np.where(rows == i) clusters.append(index[0]) cluster_centres[i] = np.mean(np.take(Y, index)) ordered_clusters = ( clust for centre, clust in sorted(zip(cluster_centres, clusters))) return ordered_clusters, num_clusters, fuzzy elif self.row_algorithm == 'MeanShift': # Here, samples are labelled, but cluster locations are provided. fuzzy = -1 in rows num_clusters = len(set(rows)) - (1 if fuzzy else 0) clusters = [] for i in range(num_clusters): index = np.where(rows == i) clusters.append(index[0]) ordered_clusters = (clust for centre, clust in sorted( zip(algorithm.cluster_centers_, clusters))) return ordered_clusters, num_clusters, fuzzy
def plot_clusters(data, algorithm, args, kwds): ''' This function takes in a dataframe, algorithm, arguments, and clusters. It returns a plot of the clusters. ''' sns.set_context('poster') sns.set_color_codes() plot_kwds = {'alpha': 0.25, 's': 80, 'linewidths': 0} if algorithm == 'k': start_time = time.time() labels = cluster.KMeans(*args, **kwds).fit_predict(data) end_time = time.time() palette = sns.color_palette('deep', np.unique(labels).max() + 1) colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels] plt.scatter(data[0], data[1], c=colors, **plot_kwds) frame = plt.gca() frame.axes.get_xaxis().set_visible(False) frame.axes.get_yaxis().set_visible(False) plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24) elif algorithm == 'mean': start_time = time.time() labels = cluster.MeanShift(*args, **kwds).fit_predict(data) end_time = time.time() palette = sns.color_palette('deep', np.unique(labels).max() + 1) colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels] plt.scatter(data[0], data[1], c=colors, **plot_kwds) frame = plt.gca() frame.axes.get_xaxis().set_visible(False) frame.axes.get_yaxis().set_visible(False) plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24) elif algorithm == 'spec': start_time = time.time() labels = cluster.SpectralClustering(*args, **kwds).fit_predict(data) end_time = time.time() palette = sns.color_palette('deep', np.unique(labels).max() + 1) colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels] plt.scatter(data[0], data[1], c=colors, **plot_kwds) frame = plt.gca() frame.axes.get_xaxis().set_visible(False) frame.axes.get_yaxis().set_visible(False) plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24) else: start_time = time.time() labels = cluster.AgglomerativeClustering(*args, **kwds).fit_predict(data) end_time = time.time() palette = sns.color_palette('deep', np.unique(labels).max() + 1) colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels] plt.scatter(data[0], data[1], c=colors, **plot_kwds) frame = plt.gca() frame.axes.get_xaxis().set_visible(False) frame.axes.get_yaxis().set_visible(False) plt.title('Clusters found by {}'.format(str(algorithm)), fontsize=24)
def get_centroids(X): # TODO another method could be used, as this does not ensure the amount of clusters # TODO Birch for example (but then you would get more centroids ms = cluster.MeanShift(bandwidth=4) ms.fit(X) labels, counts = np.unique(ms.labels_, return_counts=True) fraud = labels[np.argmin(counts)] return zip(*ms.cluster_centers_.T.tolist(), [bool(l == fraud) for l in labels])
def mean_shift(data, bandwith=None, n_samples=500, quantile=0.3): if bandwith is None: bandwidth = skcluster.estimate_bandwidth(data, quantile=quantile, n_samples=n_samples) ms = skcluster.MeanShift(bandwidth=bandwidth).fit(data) labels = ms.labels_ return labels
def _cluster(self, acts, method='KM', param_dict=None): print('Starting clustering with {} for {} activations'.format( method, acts.shape[0])) if param_dict is None: param_dict = {} centers = None if method == 'KM': n_clusters = param_dict.pop('n_clusters', 25) km = cluster.KMeans(n_clusters) d = km.fit(acts) centers = km.cluster_centers_ d = np.linalg.norm(np.expand_dims(acts, 1) - np.expand_dims(centers, 0), ord=2, axis=-1) asg, cost = np.argmin(d, -1), np.min(d, -1) elif method == 'AP': damping = param_dict.pop('damping', 0.5) ca = cluster.AffinityPropagation(damping) ca.fit(acts) centers = ca.cluster_centers_ d = np.linalg.norm(np.expand_dims(acts, 1) - np.expand_dims(centers, 0), ord=2, axis=-1) asg, cost = np.argmin(d, -1), np.min(d, -1) elif method == 'MS': ms = cluster.MeanShift(n_jobs=self.num_workers) asg = ms.fit_predict(acts) elif method == 'SC': n_clusters = param_dict.pop('n_clusters', 25) sc = cluster.SpectralClustering(n_clusters=n_clusters, n_jobs=self.num_workers) asg = sc.fit_predict(acts) elif method == 'DB': eps = param_dict.pop('eps', 0.5) min_samples = param_dict.pop('min_samples', 20) sc = cluster.DBSCAN(eps, min_samples, n_jobs=self.num_workers) asg = sc.fit_predict(acts) else: raise ValueError('Invalid Clustering Method!') if centers is None: ## If clustering returned cluster centers, use medoids centers = np.zeros((asg.max() + 1, acts.shape[1])) cost = np.zeros(len(acts)) for cluster_label in range(asg.max() + 1): cluster_idxs = np.where(asg == cluster_label)[0] cluster_points = acts[cluster_idxs] pw_distances = metrics.euclidean_distances(cluster_points) centers[cluster_label] = cluster_points[np.argmin( np.sum(pw_distances, -1))] cost[cluster_idxs] = np.linalg.norm( acts[cluster_idxs] - np.expand_dims(centers[cluster_label], 0), ord=2, axis=-1) print('Created {} clusters'.format(len(np.unique(asg)))) return asg, cost, centers
def MeanShift(data): bandwidth = cls.estimate_bandwidth(data, quantile=0.2) ms = cls.MeanShift(bandwidth=bandwidth, bin_seeding=True) ms.fit(data) labels = ms.labels_ labels_unique = np.unique(labels) cluster_centers = ms.cluster_centers_ n_clusters_ = len(labels_unique) return labels, n_clusters_
def cluster(image): '''Inputs bird's eye view skeleton and outputs clusters of the skeleton''' X, Y = np.nonzero(image) try: bandwidth = clstr.estimate_bandwidth(Y.reshape(-1, 1), quantile=0.15) ms = clstr.MeanShift(bandwidth=bandwidth, bin_seeding=True, min_bin_freq=15, cluster_all=False) kmeansoutput = ms.fit(Y.reshape(-1, 1)) except: ms = clstr.MeanShift() kmeansoutput = ms.fit(Y.reshape(-1, 1)) labels = kmeansoutput.labels_ return X, Y, labels
def clustering(X, algorithm, n_clusters=2): X = np.transpose(X) # normalize dataset for easier parameter selection X = StandardScaler().fit_transform(X) # estimate bandwidth for mean shift bandwidth = cluster.estimate_bandwidth(X, quantile=0.3) # connectivity matrix for structured Ward connectivity = kneighbors_graph(X, n_neighbors=5, include_self=False) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) # Generate the new colors: if algorithm == 'KMeans': model = cluster.KMeans(n_clusters=n_clusters, random_state=0) elif algorithm == 'Birch': model = cluster.Birch(n_clusters=n_clusters) elif algorithm == 'DBSCAN': model = cluster.DBSCAN(eps=.2) elif algorithm == 'AffinityPropagation': model = cluster.AffinityPropagation(damping=.9, preference=-200) elif algorithm == 'MeanShift': model = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) elif algorithm == 'SpectralClustering': model = cluster.SpectralClustering(n_clusters=n_clusters, eigen_solver='arpack', affinity="nearest_neighbors") elif algorithm == 'Ward': model = cluster.AgglomerativeClustering(n_clusters=n_clusters, linkage='ward', connectivity=connectivity) elif algorithm == 'AgglomerativeClustering': model = cluster.AgglomerativeClustering(linkage="average", affinity="cityblock", n_clusters=n_clusters, connectivity=connectivity) model.fit(X) if hasattr(model, 'labels_'): y_pred = model.labels_.astype(np.int) else: y_pred = model.predict(X) return X, y_pred
def mean_shift(): data_1 = numpy.random.normal(loc=0.0, scale=0.1, size=[100, 2]) data_2 = numpy.random.normal(loc=1, scale=0.1, size=[100, 2]) data = numpy.concatenate([data_1, data_2], axis=0) x = [item[0] for item in data] y = [item[1] for item in data] # bandwidth = cluster.estimate_bandwidth(data, quantile=0.5, n_samples=500) y_pre = cluster.MeanShift(bandwidth = 0.01).fit_predict(data) plt.scatter(x, y, c=y_pre) plt.show()
def meanshift_function(dataAx, bandwidth, quantile): # mean_shifter = meanshift.MeanShift() # __, mean_shift_result, mscenters = mean_shifter.product_result(dataAx, bandwidth=bandwidth) bandwidth = estimate_bandwidth(dataAx, quantile=quantile) # print(bandwidth) clf = ms.MeanShift(bandwidth=bandwidth, n_jobs=-1) clf.fit(dataAx) labels = clf.labels_ return np.array(labels) #np.array(mean_shift_result),
def mean_shift(matrix): mean_shift = skcluster.MeanShift() mean_shift.fit(matrix) labels = mean_shift.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print('Estimated number of clusters:', n_clusters_) return labels
def __init__(self, *, use_gpu=True, use_semantic=False, ignore_semantic_labels=None, **kwargs): if use_gpu: self.clusterer = MeanShiftCosine(**kwargs) else: self.clusterer = cluster.MeanShift(**kwargs) super().__init__(use_semantic, ignore_semantic_labels)
def MeanShift(self, parameters): # data, bandwidth): result = {} default_bandwidth = 3 data = np.array(parameters['data']) data = preprocessing.MinMaxScaler().fit_transform(data) if parameters.get('bandwidth') is not None: default_bandwidth = int(parameters['bandwidth']) model = skc.MeanShift(bandwidth=default_bandwidth, bin_seeding=True) clustering = model.fit(data) result['labels'] = clustering.labels_ return result
def findClusters_meanShift(data): ''' Cluster data using Mean Shift method ''' bandwidth = cl.estimate_bandwidth(data, quantile=0.25, n_samples=500) # create the classifier object meanShift = cl.MeanShift(bandwidth=bandwidth, bin_seeding=True) # fit the data return meanShift.fit(data)