def test_bayesian_mixture_predict_predict_proba(): # this is the same test as test_gaussian_mixture_predict_predict_proba() rng = np.random.RandomState(0) rand_data = RandomData(rng) for prior_type in PRIOR_TYPE: for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y bgmm = BayesianGaussianMixture( n_components=rand_data.n_components, random_state=rng, weight_concentration_prior_type=prior_type, covariance_type=covar_type, ) # Check a warning message arrive if we don't do fit msg = ("This BayesianGaussianMixture instance is not fitted yet. " "Call 'fit' with appropriate arguments before using this " "estimator.") with pytest.raises(NotFittedError, match=msg): bgmm.predict(X) bgmm.fit(X) Y_pred = bgmm.predict(X) Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) assert adjusted_rand_score(Y, Y_pred) >= 0.95
def partition_data(self,args): method, j = args if method== "vi": dp = BayesianGaussianMixture(n_components = self.K,weight_concentration_prior = self.alpha, max_iter=1,init_params='kmeans',weight_concentration_prior_type='dirichlet_process') dp.fit(self.X[self.U[j]]) Z = dp.predict(self.X[self.U[j]]).astype(int) Z_star = dp.predict(self.X_star).astype(int) if method=="gmm": Z,Z_star= self.uncollapsed_dp_partition_alt(j) elif method=="kmean": km = KMeans(n_clusters=self.K) Z = km.fit_predict(self.X[self.U[j]]).astype(int) Z_star = km.predict(self.X_star[self.U[j]]).astype(int) else: Z = np.random.choice(self.K,size = self.N_minibatch,replace=True) Z_star = np.random.choice(np.unique(Z),size = self.N_star,replace=True) le = LE() le.fit(np.hstack((Z,Z_star))) Z = le.transform(Z) Z_star = le.transform(Z_star) if (method=="vi"): #& (self.vi_partition): Z_diff = np.setdiff1d(Z_star,Z) if Z_diff.size > 0: idx = np.hstack((np.where(Z_star==k) for k in Z_diff)).flatten() unique_Z = np.unique(Z) post_Z = dp.predict_proba(self.X_star[idx])[:,unique_Z] Z_star[idx] = [np.random.choice(unique_Z,p = post_Z_i / post_Z_i.sum() ) for post_Z_i in post_Z] assert(np.setdiff1d(Z_star,Z).size == 0) return(Z,Z_star)
def fit_GMM(data,num_components): gmm = GMM(n_components=num_components) gmm.fit(data) predicted_class = gmm.predict(data) num_classes = np.unique(gmm.predict(data)).shape[0] return gmm,predicted_class,num_classes
class VBEM(object): def __init__(self, n_components=1, verbose=2, verbose_interval=1, Data=None): ''' :param n_components: cluster number :param verbose: whether to show training details :param verbose_interval: showing training details interval :param Data: dataset ''' self.model = BayesianGaussianMixture(n_components=n_components, verbose=verbose, verbose_interval=verbose_interval) self.n_components = n_components if Data == None: self.dataset = Dataset() self.dataset.generate() else: self.dataset = Data self.data = self.dataset.data def train(self): self.model.fit(self.data) def show(self, n=None): ''' show the result of trained model :param n: just used for save files :return: None ''' plt.figure() labels = self.model.predict(self.data) plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=15) if n == None: plt.show() else: plt.savefig('report/demo/vbem_%d_%d' % (n, 4)) def show_dis(self, dis=None): ''' show the result of trained model :param dis: just used for save files :return: None ''' plt.figure() labels = self.model.predict(self.data) plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=15) if dis == None: plt.show() else: plt.savefig('report/demo/dis_vbem_%d_%d' % (dis, 3))
def extract_improved_cell_centroid(cell_subimg, cell_contour): cell_subimg = skimage.filters.median(cell_subimg) # Ensure exterior of cell is set to zero. mask = np.zeros_like(cell_subimg) cv2.drawContours(mask, [cell_contour], -1, 255, -1) cell_subimg[mask == 0] = 0 #visualize('cell_subimg', cell_subimg) # 1D GMM. X = cell_subimg[cell_subimg !=0 ].reshape(-1, 1) gmm = BayesianGaussianMixture(n_components=10) gmm.fit(X) gpred_1d = gmm.predict(cell_subimg.reshape(-1, 1)).reshape(cell_subimg.shape).astype(np.uint8) # Find maximum intensity label for 1D. label_1d = np.argmax(gmm.means_) # 3D GMM. xvals = np.arange(cell_subimg.shape[0]) yvals = np.arange(cell_subimg.shape[1]) xx, yy = np.meshgrid(xvals, yvals) S = np.vstack([xx.reshape(-1), yy.reshape(-1), cell_subimg.reshape(-1)]).T #gmm = GaussianMixture(n_components=COMP) gmm = BayesianGaussianMixture(n_components=3) gmm.fit(S) gpred_3d = gmm.predict(S).reshape(cell_subimg.shape) # Find maximum intensity label for 3D. label_3d = np.argmax(gmm.means_[:, 2]) P = np.zeros_like(cell_subimg) P[np.logical_and(gpred_1d == label_1d, gpred_3d == label_3d)] = 1 # Now compute the centroid. M = cv2.moments(P) try: cx = int(M['m10'] / M['m00']) cy = int(M['m01'] / M['m00']) # If unable to extract, choose the center of the bounding rectangle as the centroid. except ZeroDivisionError: x, y, w, h = cv2.boundingRect(cell_contour) cx, cy = (x + w) // 2, (y + h) // 2 def plt_center(): plt.plot(cx, cy, 'ro') return cx, cy
class Mixtures(object): """All mixture model algorithms are implemened here.""" def __init__(self, method, data, n_clusters=2, random_state=0): """ Initialize all the parameters. method: Name of the algorithms (lower case joined by underscore) data: Data (2D Matrix) n_clusters: Number of clusters random_state: Random initial state """ self.method = method self.data = data self.n_clusters = n_clusters np.random.seed(random_state) self.random_state = random_state self.init_params = "kmeans" self.cov = "full" self.max_iter = 500 self.n_init = 5 self.weight_concentration_prior_type = "dirichlet_process" return def setup(self, **keywords): """ Setup the algorithms """ for key in keywords.keys(): setattr(self, key, keywords[key]) if self.method == "gmm": self.obj = GaussianMixture(n_components=self.n_clusters, covariance_type=self.cov, max_iter=self.max_iter, random_state=self.random_state, n_init=self.n_init, init_params=self.init_params) if self.method == "bgmm": self.obj = BayesianGaussianMixture(n_components=self.n_clusters, covariance_type=self.cov, max_iter=self.max_iter, random_state=self.random_state, n_init=self.n_init, init_params=self.init_params, weight_concentration_prior_type=self.weight_concentration_prior_type) return def run(self): """ Run the models """ if self.method == "gmm": self.obj.fit(self.data) setattr(self.obj, "labels_", self.obj.predict(self.data)) if self.method == "bgmm": self.obj.fit(self.data) setattr(self.obj, "labels_", self.obj.predict(self.data)) return
def __init__(self, **kwargs): super().__init__(data_set=kwargs.pop('data_set', None), **kwargs) self.clf_ = kwargs.get('clf', None) if self.clf_ is None: raise ValueError("missing required keyword-only argument 'clf'") if not callable(getattr(self.clf_, 'fit', None)) or not callable( (getattr(self.clf_, 'predict_proba', None))): raise TypeError( "'clf' must be an instance with the methods 'fit' and 'predict_proba'" ) n_components = int( kwargs.pop('n_components', np.min([20, len(self.data_set_)]))) if n_components < 0 or n_components > len(self.data_set_): raise ValueError( "'n_components' must be an integer in the interval [1, n_samples]" ) # fit Gaussian mixture model for pre-clustering gmm = BayesianGaussianMixture(n_components=n_components, covariance_type='spherical', max_iter=1000, random_state=self.random_state_) gmm.fit(self.data_set_.X_) self.y_cluster_ = gmm.predict(self.data_set_.X_) self.p_x_ = np.exp(gmm.score_samples(self.data_set_.X_))
def cluster(self, dim, method='dpgmm', max_n_clusters=80, max_iter=300, refresh=True): ''' dim is the dim index for clustering ''' print('clustering DPGMM') from sklearn.mixture import BayesianGaussianMixture as DPGMM dpgmm = DPGMM(n_components=max_n_clusters, covariance_type='full', weight_concentration_prior=1e-3, weight_concentration_prior_type='dirichlet_process', init_params="kmeans", max_iter=max_iter, random_state=0, verbose=1, verbose_interval=10) # init can be "kmeans" or "random" dpgmm.fit(self.fet[:, dim]) label = dpgmm.predict(self.fet[:, dim]) self.clu.membership = label self.clu.__construct__() self.clu.emit('cluster') if refresh is True: self.set_data(self.fet, self.clu) return label
def bayesian_gaussian_mixture(self, n_components, weight_concentration_prior_type, weight_concentration_prior, mean_precision_prior, n_init, max_iter, init_params): '''Bayesian Gaussian Mixture clustering algorithm. Low value for weight_concentration_prior will put more weight on a few components, high value will allow a larger number of components to be active in the mixture.''' bgm = BayesianGaussianMixture(n_components=n_components, weight_concentration_prior_type=weight_concentration_prior_type, weight_concentration_prior=weight_concentration_prior, mean_precision_prior=mean_precision_prior, n_init=n_init, max_iter=max_iter, init_params=init_params) bgm.fit(self.X) self.labels = bgm.predict(self.X) unique, counts = np.unique(self.labels, return_counts=True) mydict = dict(zip(unique, counts)) print(mydict) plt.bar(list(mydict.keys()), mydict.values(), color = 'g') plt.ylabel("Number of skews") plt.xlabel("Cluster") plt.title(weight_concentration_prior_type) plt.gcf().text(0.05, 0.05, "Parameters initialized using: "+init_params) plt.gcf().text(0.05, 0.01, "Weight concentration prior: "+str(weight_concentration_prior)) plt.gcf().text(0.7, 0.05, "Mean precision prior: "+str(mean_precision_prior)) plt.gcf().text(0.7, 0.01, "Likelihood: "+str("%.2f"%bgm.lower_bound_)) #plt.show() print("Weights: "+str(bgm.weights_)) print("Converged: "+str(bgm.converged_)) print("Number of iterations to reach convergence: "+str(bgm.n_iter_)) print("Lower bound value on likelihood: "+str(bgm.lower_bound_)) print("Bayesian Gaussian mixture complete")
class VBEM(object): def __init__(self, n_components=1, verbose=2, verbose_interval=1, data=None): self.model = BayesianGaussianMixture(n_components=n_components, verbose=verbose, verbose_interval=verbose_interval) self.n_components = n_components if data == None: self.dataset = Dataset() self.dataset.generate() else: self.dataset = data self.data = self.dataset.data def train(self): self.model.fit(self.data) def show(self, n=None): plt.figure() self.model.fit(self.data) labels = self.model.predict(self.data) plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=10) if n == None: plt.show() else: plt.savefig('Pro2/vbem_%d_%d' % (n, 4))
def test_bayesian_mixture_predict_predict_proba(): # this is the same test as test_gaussian_mixture_predict_predict_proba() rng = np.random.RandomState(0) rand_data = RandomData(rng) for prior_type in PRIOR_TYPE: for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y bgmm = BayesianGaussianMixture( n_components=rand_data.n_components, random_state=rng, weight_concentration_prior_type=prior_type, covariance_type=covar_type) # Check a warning message arrive if we don't do fit assert_raise_message( NotFittedError, "This BayesianGaussianMixture instance" " is not fitted yet. Call 'fit' with " "appropriate arguments before using " "this method.", bgmm.predict, X) bgmm.fit(X) Y_pred = bgmm.predict(X) Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95)
def recluster_node(dataset, node=None, idx=None, label=None, n_clusters=4): selector = match_one(dataset, label=label, idx=idx, node=node) # Get the node you want to recluster and flatten it selected_data = dataset.select(selector).flatten(1) if len(selected_data) >= 100: cluster_on = tsne_time(selected_data, pcs=6, t_scale=2 * 60 * 60.0) else: cluster_on = PCA( n_components=min(6, len(selected_data))).fit_transform( selected_data.waveforms) n_clusters = min(n_clusters, len(cluster_on)) weight = np.array([node.count for node in selected_data.nodes]) # kmeans = KMeans(n_clusters=n_clusters).fit(cluster_on, sample_weight=weight) # labels = kmeans.predict(cluster_on, sample_weight=weight) if len(cluster_on) < 2: labels = np.arange(len(cluster_on)) else: gmm = BayesianGaussianMixture(n_components=n_clusters).fit(cluster_on) labels = gmm.predict(cluster_on) reclustered = selected_data.cluster(labels) new_dataset = dataset.select(np.logical_not(selector), child=False) return add_nodes(new_dataset, *reclustered.nodes)
def do_bgm(self, n_components=6, seed=42): """Bayesian Gaussian Mixture. Infer the effective number of components in a Gaussian Mixture Model via variational Bayesian estimation. n_effective_componenents < n_components if the model sets some weights close to 0. Args: n_components (int): Number of components in GMM. seed (int): Random seed. Returns: bgm_output (dict): Labels and probabilities. """ np.random.seed(seed) bgm = BayesianGaussianMixture(n_components=n_components, covariance_type='full', weight_concentration_prior=1e-2, weight_concentration_prior_type='dirichlet_process', mean_precision_prior=1e-2, init_params='random', max_iter=100, random_state=seed) bgm.fit(self.X) bgm_labels = bgm.predict(self.X) bgm_prob = bgm.predict_proba(self.X)[:,0] bgm_output = {'bgm_labels': bgm_labels, 'bgm_prob': bgm_prob} return bgm_output
def airmass_labels(z, P, T, H2O, O3, n_airmass=5, labels=None): cH2O = mf2mol_cum(H2O, P, T) cO3 = mf2mol_cum(O3, P, T) T_surf = T[:, z < 3].mean(axis=1) T_grad = np.diff(T[:, z < 6], axis=1).mean(axis=1) H2O_tot = cH2O[:, -1] O3_tot = cO3[:, -1] f = lambda x: (x - x.mean()) / x.std() features = np.vstack((f(T_surf), f(T_grad), f(H2O_tot), f(O3_tot))).T if labels is None: pdf = BayesianGaussianMixture(n_components=n_airmass, covariance_type='full', max_iter=25000) pdf.fit(features) labels = pdf.predict(features) plt.figure() for ii in range(n_airmass): ix = labels == ii plt.subplot(1, 3, 1) plt.plot(T_surf[ix], H2O_tot[ix], '.') plt.xlabel('Mean T (z<3km) [K]') plt.ylabel('Total H2O [mol]') plt.subplot(1, 3, 2) plt.plot(T_surf[ix], 1e6 * O3_tot[ix], '.') plt.xlabel('Mean T (z<3km) [K]') plt.ylabel('Total O3 [µmol]') plt.subplot(1, 3, 3) plt.plot(H2O_tot[ix], 1e6 * O3_tot[ix], '.') plt.xlabel('Total H2O [mol]') plt.ylabel('Total O3 [µmol]') return labels
def test_bayesian_mixture_fit_predict_n_init(): # Check that fit_predict is equivalent to fit.predict, when n_init > 1 X = np.random.RandomState(0).randn(1000, 5) gm = BayesianGaussianMixture(n_components=5, n_init=10, random_state=0) y_pred1 = gm.fit_predict(X) y_pred2 = gm.predict(X) assert_array_equal(y_pred1, y_pred2)
class VBEM(object): def __init__(self, n_components=5, dataset=None): self.model = BayesianGaussianMixture(n_components=n_components, max_iter=10000) self.n_components = n_components self.class_num = dataset.class_num self.data_num = dataset.data_num self.data = dataset.data self.label = dataset.label self.bestVBEM_k = 0 self.model.fit(self.data) def draw(self): label = self.model.predict(self.data) self.bestVBEM_k = max(label) + 1 data_2d = pd.DataFrame(self.data, columns=['x', 'y']) label_2d = pd.DataFrame(label, columns=['label']) label_names = np.unique(label) colors = [ plt.cm.tab10(i / float(len(label_names))) for i in range(len(label_names)) ] tmp_2d = pd.concat([data_2d, label_2d], axis=1) plt.figure() for i, label in enumerate(label_names): plt.scatter(tmp_2d.loc[tmp_2d.label == label].x, tmp_2d.loc[tmp_2d.label == label].y, s=5, cmap=colors[i], alpha=0.5) plt.title('Best GMM with VBEM_' + str(self.class_num) + '_' + str(self.data_num)) plt.savefig('res/GMM_VBEM_' + str(self.class_num) + '_' + str(self.data_num) + '.jpg')
def test_bayesian_mixture_fit_predict_n_init(): # Check that fit_predict is equivalent to fit.predict, when n_init > 1 X = np.random.RandomState(0).randn(50, 5) gm = BayesianGaussianMixture(n_components=5, n_init=10, random_state=0) y_pred1 = gm.fit_predict(X) y_pred2 = gm.predict(X) assert_array_equal(y_pred1, y_pred2)
def test_bayesian_mixture_predict_predict_proba(): # this is the same test as test_gaussian_mixture_predict_predict_proba() rng = np.random.RandomState(0) rand_data = RandomData(rng) for prior_type in PRIOR_TYPE: for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y bgmm = BayesianGaussianMixture( n_components=rand_data.n_components, random_state=rng, weight_concentration_prior_type=prior_type, covariance_type=covar_type) # Check a warning message arrive if we don't do fit assert_raise_message(NotFittedError, "This BayesianGaussianMixture instance" " is not fitted yet. Call 'fit' with " "appropriate arguments before using " "this method.", bgmm.predict, X) bgmm.fit(X) Y_pred = bgmm.predict(X) Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95)
def genotype(cnvays): result = [] n_com = 10 if cnvays.shape[1] >= 10 else cnvays.shape[1] n_init = 3 for cnvay in cnvays: cnv = [[x] for x in cnvay] dpgmm = BayesianGaussianMixture( n_components=n_com, n_init=n_init, max_iter=10000, weight_concentration_prior_type='dirichlet_process').fit(cnv) labels = dpgmm.predict(cnv) normed_ay = np.arange(0, np.max(cnvay) + 0.5, 0.5) swlabels = {} for rawlabel in np.unique(labels): swlabels[rawlabel] = normed_ay[np.argmin( np.abs(normed_ay - np.median(cnvay[labels == rawlabel])))] newlabels = [swlabels[x] for x in labels] gtlabes = {0: 'dd', 0.5: 'Ad', 1: 'AA', 1.5: 'AB', 2: 'BB', 2.5: 'BC'} finalline = [gtlabes.get(x, 'M') for x in newlabels] if len(np.unique(finalline)) > 1: sc = silhouette_score(cnv, finalline, metric='euclidean') # silhouette_score chs = calinski_harabaz_score(cnv, labels) # calinski_harabaz_score else: sc = np.nan chs = np.nan llh = dpgmm.score( cnv) # Log likelihood of the Gaussian mixture given X finalline += [sc, chs, llh] result.append(finalline) return result
def predict_cp_interval(self, n_components = 30): ''' Estimates the (phenotypical) levels of observed amplitudes, regardless of order. Consequently, each observed time-point is classified. Between each transition from one inferred level to another one, a change-point with uniform distirbution is inferred. :param n_components: maximum number of components of the mixture model (default is 30) ''' logging.warn("Predicting CP intervals") state_mix = BayesianGaussianMixture( n_components, n_init = 10, weight_concentration_prior_type = 'dirichlet_distribution', verbose = 1, max_iter = 500, tol=1e-12 ) observed = self.observation[~np.isnan(self.observation)].reshape(-1, 1) state_mix.fit(observed) classified = deepcopy(self.observation) predicted = state_mix.predict(classified[~np.isnan(classified)].reshape(-1, 1)) classified[~np.isnan(classified)] = predicted last = None begin = 0 for i, c in enumerate(classified): if not np.isnan(c): last = c begin = i break segments = [] for i in range(begin, classified.shape[0]): if not np.isnan(classified[i]): if classified[i] != last: s = np.max(np.argwhere(~np.isnan(classified[0:i-1]))) segments.append((s, i)) last = classified[i] begin = i # calculate uniform distribution parameters result = [] for segment in segments: a = segment[0] b = segment[1] distro = { 'begin': a, 'end': b } result.append(distro) result = pd.DataFrame(result) return result, state_mix
def clusteringBayGaussMixt(X, y, nclusters, paramlist): bgm = BayesianGaussianMixture(n_components=nclusters, covariance_type='full', tol=0.001, reg_covar=1e-06, max_iter=100, n_init=1,\ init_params='kmeans', weight_concentration_prior_type='dirichlet_process', weight_concentration_prior=None,\ mean_precision_prior=None, mean_prior=None, degrees_of_freedom_prior=None, covariance_prior=None, \ random_state=None, warm_start=False, verbose=0, verbose_interval=10) bgm.fit(X, y) labels = bgm.predict(X) return labels
def convert_to_deciles(y, n=10, gmm=False): """ By default converts to deciles, can be changed based on choice of n. """ if gmm: # this is experimental bgm = BayesianGaussianMixture(n_components=10) bgm.fit(y.reshape(-1, 1)) return bgm.predict(y.reshape(-1, 1)) return np.array(pd.cut(y, n, labels=range(n)))
def bayesian_gaussian_mixture(latent): gauss_mix = BayesianGaussianMixture( n_components=N_COMPONENTS, covariance_type=COVARIANCE_TYPE, weight_concentration_prior_type=WEIGHT_CONCENTRATION_PRIOR_TYPE, weight_concentration_prior=WEIGHT_CONCENTRATION_PRIOR, max_iter=MAX_ITER, verbose=VERBOSE).fit(latent) labels = gauss_mix.predict(latent) return labels
def getBayesianGaussian(filename, targetname): # use Bayesian Gaussian model on tsne matrix = tsne(filename) # fit the model model = BayesianGaussianMixture(n_components=8).fit(matrix) label = model.predict(matrix) print(label) # generate graph getTsne(filename, targetname, label)
def cluster_embeddings( self, cluster_method, num_clusters, use_decomposed, additional_params={"random_state": 0}, ): """ @param cluster_method clustering method, one of {KMeans, Spectral, GaussianMix, BayesGaussMix} @param num_clusters number of clusters @param use_decomposed boolean, whether to use decomposed or raw representations @param additional_params {param_name: value} of parameters accepted by the sklearn clustering function. Cannot include n_components. Predicts cluster assignments for data points on raw or decomposed embedding representations. """ self.cluster_method = cluster_method self.num_clusters = num_clusters if use_decomposed: vec_df = self.decomposed_embedding_representation else: vec_df = self.embedding_representation if self.cluster_method == "KMeans": self.predicted_labels = KMeans( n_clusters=self.num_clusters, **additional_params ).fit_predict(vec_df) if self.cluster_method == "Spectral": self.predicted_labels = SpectralClustering( n_clusters=self.num_clusters, affinity="cosine", assign_labels="discretize", **additional_params, ).fit_predict(vec_df) if self.cluster_method == "GaussianMix": gm_model = GaussianMixture( n_components=self.num_clusters, **additional_params ).fit(vec_df) self.predicted_labels = gm_model.predict(vec_df) if self.cluster_method == "BayesGaussMix": bgm_model = BayesianGaussianMixture( n_components=self.num_clusters, **additional_params ).fit(vec_df) self.predicted_labels = bgm_model.predict(vec_df) self.num_clusters = len( set(self.predicted_labels) ) # set num_clusters to actual num components
def embed_mixture_variational(xmaps_np, n_components, ): sample_by_feature = np.vstack([np_map.flatten() for dtag, np_map in xmaps_np.items() ] ) # mixture = BayesianGaussianMixture() begin = time.time() pca = PCA(n_components=50) sample_by_feature_pca = pca.fit_transform(sample_by_feature) print("shape is: {}".format(sample_by_feature_pca.shape)) mixture = BayesianGaussianMixture(n_components, covariance_type="spherical", verbose=10, verbose_interval=2, ) mixture.fit(sample_by_feature_pca) finish = time.time() print(mixture) print("Finished in {}".format(finish - begin)) print(mixture.predict(sample_by_feature_pca)) print(mixture.weights_) clusters = mixture.predict(sample_by_feature_pca) probabilities = mixture.score_samples(sample_by_feature_pca) return mixture, pca, clusters, probabilities
def split_units(self, n_clusters): """Splits recovered spikes per units into clusters.""" self.n_clusters = n_clusters if self.cid is None: self.cid = [] for i in tqdm(range(self.n_unit), 'Splitting Units'): f = self.features[i] f = f.reshape([f.shape[0], self.n_feat * self.n_main_chan]) clustering = BayesianGaussianMixture( n_components=n_clusters, max_iter=500) clustering.fit(f) self.cid.append(clustering.predict(f)) return self.cid
def bayesian_gaussian_mixture(vector: np.array, n: int, BIC_calculate = False): if BIC_calculate == True: np.random.seed(140597) mask = np.random.choice([False, True], len(vector), p=[0.70, 0.30]) model_train = BayesianGaussianMixture(n_components=n, covariance_type='full').fit(vector[~mask]) validation_score = model_train.score(vector[mask]) train_score = model_train.score(vector[~mask]) return validation_score, train_score else: np.random.seed(140597) mask = np.random.choice([False, True], len(vector), p=[0.70, 0.30]) dpgmm = BayesianGaussianMixture(n_components=n, covariance_type='full', max_iter=900, tol=1e-4).fit(vector[~mask]) cluster_label = dpgmm.predict(vector) return cluster_label
def _dpgmm(fet, n_comp=8, max_iter=400): from sklearn.mixture import BayesianGaussianMixture as DPGMM dpgmm = DPGMM(n_components=n_comp, covariance_type='full', weight_concentration_prior=1e-3, weight_concentration_prior_type='dirichlet_process', init_params="kmeans", max_iter=100, random_state=0, verbose=0, verbose_interval=10) # init can be "kmeans" or "random" dpgmm.fit(fet) label = dpgmm.predict(fet) return label
def gmm_hyper(hyper_image, features, n_clusters): """ """ gmm = BayesianGaussianMixture(n_components=n_clusters, covariance_type='spherical').fit(features) labels = gmm.predict(features) label_image = labels.reshape(hyper_image.shape[:-1]) gmm_spectra = average_spectra(hyper_image, labels) score = calinski_harabaz_score(features, label_image.ravel()) return label_image, gmm_spectra, score
def run_BayesianGaussianMixture(Y, K): """ For K-means clustering Input ----- Y: the expression matrix K: number of clusters return ----- clusters assigned to each cell. """ gmm = BayesianGaussianMixture(K, max_iter=1000) gmm.fit(Y) return gmm.predict(Y)
def Bayesian_gmm_inference(self, data: Union[pd.DataFrame, np.ndarray], **params) -> None: """ Bayesian inference of parameters by the EM algorithms of sklearn, accept only DataFrame with numerical data, please do the feature engineering before enter the Dataframe :param data: Set of data to do the inference :return: None """ Bayesian_gmm = BayesianGaussianMixture(**params) Bayesian_gmm.fit(data) self.means = Bayesian_gmm.means_ self.precision = Bayesian_gmm.precisions_ self.weight = Bayesian_gmm.weights_ self.label = Bayesian_gmm.predict(data) self.data = data self.nrb_comp = Bayesian_gmm.n_components
ax1_min, ax1_max, ax2_min, ax2_max = plt.axis() plt.xlim((x1_min, x1_max)) plt.ylim((x2_min, x2_max)) plt.title('GMM', fontsize=15) plt.grid(b=True, ls=':', color='#606060') # DPGMM dpgmm = BayesianGaussianMixture(n_components=n_components, covariance_type='full', max_iter=1000, n_init=5, weight_concentration_prior_type='dirichlet_process', weight_concentration_prior=0.1) dpgmm.fit(x) centers = dpgmm.means_ covs = dpgmm.covariances_ print('DPGMM均值 = \n', centers) print('DPGMM方差 = \n', covs) y_hat = dpgmm.predict(x) print(y_hat) ax = plt.subplot(212) grid_hat = dpgmm.predict(grid_test) grid_hat = grid_hat.reshape(x1.shape) plt.pcolormesh(x1, x2, grid_hat, cmap=cm) plt.scatter(x[:, 0], x[:, 1], s=20, c=y, cmap=cm, marker='o', edgecolors='#202020') for i, cc in enumerate(zip(centers, covs)): if i not in y_hat: continue center, cov = cc value, vector = sp.linalg.eigh(cov) width, height = value[0], value[1] v = vector[0] / sp.linalg.norm(vector[0])
appl = 'WHE' # Create vector with P and Q values and plot them P = d[appl].P[init:end].values Q = d[appl].Q[init:end].values X = np.transpose([P, Q]) plt.plot(d[appl].P[init:end], d[appl].Q[init:end],'o', alpha=0.1) # Normalize X sscl = StandardScaler().fit(X) X = sscl.transform(X) # Apply clusterer bgm = BayesianGaussianMixture(n_components=33, covariance_type='full', weight_concentration_prior_type='dirichlet_distribution', random_state=42).fit(X) y_pred = bgm.predict(X) # Plot clusters with X unnormalized X = sscl.inverse_transform(X) plt.figure() plt.scatter(X[:,0],X[:,1], color=colors[y_pred]) means = sscl.inverse_transform(bgm.means_) medians = get_medians(X, y_pred) # plt.plot(means[:,0],means[:,1],'kx') plt.plot(medians[:,0],medians[:,1],'kx') # TODO: Compare mean with ground truth plt.figure() plt.plot(P) P_pred = means[y_pred][:,0]
data_thr.rateC, data_thr.rateCA] scaler = StandardScaler() X = scaler.fit_transform(X) # 1 corresponds to data_thr.rate and 4=5-1 to data_thr.rateC w = w / np.sqrt(scaler.var_[1:]) # w = np.exp(-np.exp(3 * w.mean(axis=1))) w = 1. / w.mean(axis=1) ** 2 Html_file = open("gmm_sklearn_files/gmm3_sklearn.html", "w") gmm = BayesianGaussianMixture(n_components=3, alpha_prior=0.1, beta_prior=1, n_init=5) gmm.fit(X) # , weights=w) not implemented in sklearn yet preds = gmm.predict(X) probs = gmm.predict_proba(X) data_thr['preds'] = pd.Series(preds).astype("category") color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink", "brown", "green", "orange"] # Spectral9 color_key = color_key[:len(set(preds))+1] covs = gmm.covariances_ means = gmm.means_ # transform cov for non-standardizeed data: covs = np.array([np.dot(np.diag(np.sqrt(scaler.var_)), np.dot(covs[j], np.diag(np.sqrt(scaler.var_)))) for j in range(covs.shape[0])])