예제 #1
0
def test_bayesian_mixture_predict_predict_proba():
    # this is the same test as test_gaussian_mixture_predict_predict_proba()
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            Y = rand_data.Y
            bgmm = BayesianGaussianMixture(
                n_components=rand_data.n_components,
                random_state=rng,
                weight_concentration_prior_type=prior_type,
                covariance_type=covar_type,
            )

            # Check a warning message arrive if we don't do fit
            msg = ("This BayesianGaussianMixture instance is not fitted yet. "
                   "Call 'fit' with appropriate arguments before using this "
                   "estimator.")
            with pytest.raises(NotFittedError, match=msg):
                bgmm.predict(X)

            bgmm.fit(X)
            Y_pred = bgmm.predict(X)
            Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
            assert_array_equal(Y_pred, Y_pred_proba)
            assert adjusted_rand_score(Y, Y_pred) >= 0.95
예제 #2
0
 def partition_data(self,args):
     method, j = args
     if method== "vi":
         dp = BayesianGaussianMixture(n_components = self.K,weight_concentration_prior = self.alpha, max_iter=1,init_params='kmeans',weight_concentration_prior_type='dirichlet_process')
         dp.fit(self.X[self.U[j]])
         Z = dp.predict(self.X[self.U[j]]).astype(int)
         Z_star = dp.predict(self.X_star).astype(int)
     if method=="gmm":
         Z,Z_star= self.uncollapsed_dp_partition_alt(j)
     elif method=="kmean":
         km = KMeans(n_clusters=self.K)
         Z = km.fit_predict(self.X[self.U[j]]).astype(int)
         Z_star = km.predict(self.X_star[self.U[j]]).astype(int)
     else:
         Z = np.random.choice(self.K,size = self.N_minibatch,replace=True)
         Z_star = np.random.choice(np.unique(Z),size = self.N_star,replace=True)
     le = LE()
     le.fit(np.hstack((Z,Z_star)))
     Z = le.transform(Z)
     Z_star = le.transform(Z_star)
     if (method=="vi"): #& (self.vi_partition):
         Z_diff = np.setdiff1d(Z_star,Z)
         if Z_diff.size > 0:
             idx = np.hstack((np.where(Z_star==k) for k in Z_diff)).flatten()
             unique_Z = np.unique(Z)
             post_Z = dp.predict_proba(self.X_star[idx])[:,unique_Z]
             Z_star[idx] = [np.random.choice(unique_Z,p = post_Z_i / post_Z_i.sum() ) for post_Z_i in post_Z]
             assert(np.setdiff1d(Z_star,Z).size == 0)
     return(Z,Z_star)
예제 #3
0
def fit_GMM(data,num_components):
    gmm = GMM(n_components=num_components)
    gmm.fit(data)

    predicted_class = gmm.predict(data)
    num_classes = np.unique(gmm.predict(data)).shape[0]

    return gmm,predicted_class,num_classes
예제 #4
0
class VBEM(object):
    def __init__(self,
                 n_components=1,
                 verbose=2,
                 verbose_interval=1,
                 Data=None):
        '''
        :param n_components: cluster number
        :param verbose: whether to show training details
        :param verbose_interval: showing training details interval
        :param Data: dataset
        '''
        self.model = BayesianGaussianMixture(n_components=n_components,
                                             verbose=verbose,
                                             verbose_interval=verbose_interval)
        self.n_components = n_components
        if Data == None:
            self.dataset = Dataset()
            self.dataset.generate()
        else:
            self.dataset = Data
        self.data = self.dataset.data

    def train(self):
        self.model.fit(self.data)

    def show(self, n=None):
        '''
        show the result of trained model
        :param n: just used for save files
        :return: None
        '''
        plt.figure()
        labels = self.model.predict(self.data)

        plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=15)

        if n == None:
            plt.show()
        else:
            plt.savefig('report/demo/vbem_%d_%d' % (n, 4))

    def show_dis(self, dis=None):
        '''
        show the result of trained model
        :param dis: just used for save files
        :return: None
        '''
        plt.figure()
        labels = self.model.predict(self.data)

        plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=15)

        if dis == None:
            plt.show()
        else:
            plt.savefig('report/demo/dis_vbem_%d_%d' % (dis, 3))
def extract_improved_cell_centroid(cell_subimg, cell_contour):
    cell_subimg = skimage.filters.median(cell_subimg)  
    # Ensure exterior of cell is set to zero.
    mask = np.zeros_like(cell_subimg)
    cv2.drawContours(mask, [cell_contour], -1, 255, -1)
    cell_subimg[mask == 0] = 0

    #visualize('cell_subimg', cell_subimg)

    # 1D GMM.
    X = cell_subimg[cell_subimg !=0 ].reshape(-1, 1)
    gmm = BayesianGaussianMixture(n_components=10)
    gmm.fit(X)
    gpred_1d = gmm.predict(cell_subimg.reshape(-1, 1)).reshape(cell_subimg.shape).astype(np.uint8)

    # Find maximum intensity label for 1D.
    label_1d = np.argmax(gmm.means_)

    # 3D GMM.
    xvals = np.arange(cell_subimg.shape[0])
    yvals = np.arange(cell_subimg.shape[1])
    xx, yy = np.meshgrid(xvals, yvals)
    S = np.vstack([xx.reshape(-1), yy.reshape(-1), cell_subimg.reshape(-1)]).T
    #gmm = GaussianMixture(n_components=COMP)
    gmm = BayesianGaussianMixture(n_components=3)
    gmm.fit(S)
    gpred_3d = gmm.predict(S).reshape(cell_subimg.shape)

    # Find maximum intensity label for 3D.
    label_3d = np.argmax(gmm.means_[:, 2])

    P = np.zeros_like(cell_subimg)

    P[np.logical_and(gpred_1d == label_1d, gpred_3d == label_3d)] = 1

    # Now compute the centroid.
    M = cv2.moments(P)

    try: 
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])

    # If unable to extract, choose the center of the bounding rectangle as the centroid.
    except ZeroDivisionError:
        x, y, w, h = cv2.boundingRect(cell_contour)
        cx, cy = (x + w) // 2, (y + h) // 2

    def plt_center():
        plt.plot(cx, cy, 'ro')

    return cx, cy
예제 #6
0
class Mixtures(object):
    """All mixture model algorithms are implemened here."""

    def __init__(self, method, data, n_clusters=2, random_state=0):
        """
        Initialize all the parameters.
        method: Name of the algorithms (lower case joined by underscore)
        data: Data (2D Matrix)
        n_clusters: Number of clusters
        random_state: Random initial state
        """
        self.method = method
        self.data = data
        self.n_clusters = n_clusters
        np.random.seed(random_state)
        self.random_state = random_state
        self.init_params = "kmeans"
        self.cov = "full"
        self.max_iter = 500
        self.n_init = 5
        self.weight_concentration_prior_type = "dirichlet_process"
        return

    def setup(self, **keywords):
        """
        Setup the algorithms
        """
        for key in keywords.keys():
            setattr(self, key, keywords[key])
        if self.method == "gmm": self.obj = GaussianMixture(n_components=self.n_clusters,
                covariance_type=self.cov, max_iter=self.max_iter, random_state=self.random_state,
                n_init=self.n_init, init_params=self.init_params)
        if self.method == "bgmm": self.obj = BayesianGaussianMixture(n_components=self.n_clusters,
                covariance_type=self.cov, max_iter=self.max_iter, random_state=self.random_state,
                n_init=self.n_init, init_params=self.init_params, 
                weight_concentration_prior_type=self.weight_concentration_prior_type)
        return

    def run(self):
        """
        Run the models
        """
        if self.method == "gmm": 
            self.obj.fit(self.data)
            setattr(self.obj, "labels_", self.obj.predict(self.data))
        if self.method == "bgmm":
            self.obj.fit(self.data)
            setattr(self.obj, "labels_", self.obj.predict(self.data))
        return
    def __init__(self, **kwargs):
        super().__init__(data_set=kwargs.pop('data_set', None), **kwargs)

        self.clf_ = kwargs.get('clf', None)
        if self.clf_ is None:
            raise ValueError("missing required keyword-only argument 'clf'")
        if not callable(getattr(self.clf_, 'fit', None)) or not callable(
            (getattr(self.clf_, 'predict_proba', None))):
            raise TypeError(
                "'clf' must be an instance with the methods 'fit' and 'predict_proba'"
            )

        n_components = int(
            kwargs.pop('n_components', np.min([20, len(self.data_set_)])))
        if n_components < 0 or n_components > len(self.data_set_):
            raise ValueError(
                "'n_components' must be an integer in the interval [1, n_samples]"
            )

        # fit Gaussian mixture model for pre-clustering
        gmm = BayesianGaussianMixture(n_components=n_components,
                                      covariance_type='spherical',
                                      max_iter=1000,
                                      random_state=self.random_state_)
        gmm.fit(self.data_set_.X_)
        self.y_cluster_ = gmm.predict(self.data_set_.X_)
        self.p_x_ = np.exp(gmm.score_samples(self.data_set_.X_))
예제 #8
0
    def cluster(self,
                dim,
                method='dpgmm',
                max_n_clusters=80,
                max_iter=300,
                refresh=True):
        '''
        dim is the dim index for clustering
        '''
        print('clustering DPGMM')
        from sklearn.mixture import BayesianGaussianMixture as DPGMM
        dpgmm = DPGMM(n_components=max_n_clusters,
                      covariance_type='full',
                      weight_concentration_prior=1e-3,
                      weight_concentration_prior_type='dirichlet_process',
                      init_params="kmeans",
                      max_iter=max_iter,
                      random_state=0,
                      verbose=1,
                      verbose_interval=10)  # init can be "kmeans" or "random"
        dpgmm.fit(self.fet[:, dim])
        label = dpgmm.predict(self.fet[:, dim])
        self.clu.membership = label
        self.clu.__construct__()
        self.clu.emit('cluster')

        if refresh is True:
            self.set_data(self.fet, self.clu)
        return label
예제 #9
0
    def bayesian_gaussian_mixture(self, n_components, weight_concentration_prior_type, weight_concentration_prior,
                                mean_precision_prior, n_init, max_iter, init_params):
        '''Bayesian Gaussian Mixture clustering algorithm. Low value for weight_concentration_prior will put more
        weight on a few components, high value will allow a larger number of components to be active in the mixture.'''
        bgm = BayesianGaussianMixture(n_components=n_components,
                                      weight_concentration_prior_type=weight_concentration_prior_type,
                                      weight_concentration_prior=weight_concentration_prior,
                                      mean_precision_prior=mean_precision_prior,
                                      n_init=n_init,
                                      max_iter=max_iter,
                                      init_params=init_params)
        bgm.fit(self.X)
        self.labels = bgm.predict(self.X)

        unique, counts = np.unique(self.labels, return_counts=True)
        mydict = dict(zip(unique, counts))
        print(mydict)

        plt.bar(list(mydict.keys()), mydict.values(), color = 'g')
        plt.ylabel("Number of skews")
        plt.xlabel("Cluster")
        plt.title(weight_concentration_prior_type)

        plt.gcf().text(0.05, 0.05, "Parameters initialized using: "+init_params)
        plt.gcf().text(0.05, 0.01, "Weight concentration prior: "+str(weight_concentration_prior))
        plt.gcf().text(0.7, 0.05, "Mean precision prior: "+str(mean_precision_prior))
        plt.gcf().text(0.7, 0.01, "Likelihood: "+str("%.2f"%bgm.lower_bound_))
        #plt.show()

        print("Weights: "+str(bgm.weights_))
        print("Converged: "+str(bgm.converged_))
        print("Number of iterations to reach convergence: "+str(bgm.n_iter_))
        print("Lower bound value on likelihood: "+str(bgm.lower_bound_))
        print("Bayesian Gaussian mixture complete")
예제 #10
0
class VBEM(object):
    def __init__(self,
                 n_components=1,
                 verbose=2,
                 verbose_interval=1,
                 data=None):
        self.model = BayesianGaussianMixture(n_components=n_components,
                                             verbose=verbose,
                                             verbose_interval=verbose_interval)
        self.n_components = n_components
        if data == None:
            self.dataset = Dataset()
            self.dataset.generate()
        else:
            self.dataset = data
        self.data = self.dataset.data

    def train(self):
        self.model.fit(self.data)

    def show(self, n=None):
        plt.figure()
        self.model.fit(self.data)
        labels = self.model.predict(self.data)
        plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=10)
        if n == None:
            plt.show()
        else:
            plt.savefig('Pro2/vbem_%d_%d' % (n, 4))
예제 #11
0
def test_bayesian_mixture_predict_predict_proba():
    # this is the same test as test_gaussian_mixture_predict_predict_proba()
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            Y = rand_data.Y
            bgmm = BayesianGaussianMixture(
                n_components=rand_data.n_components,
                random_state=rng,
                weight_concentration_prior_type=prior_type,
                covariance_type=covar_type)

            # Check a warning message arrive if we don't do fit
            assert_raise_message(
                NotFittedError, "This BayesianGaussianMixture instance"
                " is not fitted yet. Call 'fit' with "
                "appropriate arguments before using "
                "this method.", bgmm.predict, X)

            bgmm.fit(X)
            Y_pred = bgmm.predict(X)
            Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
            assert_array_equal(Y_pred, Y_pred_proba)
            assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95)
예제 #12
0
def recluster_node(dataset, node=None, idx=None, label=None, n_clusters=4):
    selector = match_one(dataset, label=label, idx=idx, node=node)

    # Get the node you want to recluster and flatten it
    selected_data = dataset.select(selector).flatten(1)
    if len(selected_data) >= 100:
        cluster_on = tsne_time(selected_data, pcs=6, t_scale=2 * 60 * 60.0)
    else:
        cluster_on = PCA(
            n_components=min(6, len(selected_data))).fit_transform(
                selected_data.waveforms)

    n_clusters = min(n_clusters, len(cluster_on))

    weight = np.array([node.count for node in selected_data.nodes])

    # kmeans = KMeans(n_clusters=n_clusters).fit(cluster_on, sample_weight=weight)
    # labels = kmeans.predict(cluster_on, sample_weight=weight)
    if len(cluster_on) < 2:
        labels = np.arange(len(cluster_on))
    else:
        gmm = BayesianGaussianMixture(n_components=n_clusters).fit(cluster_on)
        labels = gmm.predict(cluster_on)
    reclustered = selected_data.cluster(labels)

    new_dataset = dataset.select(np.logical_not(selector), child=False)
    return add_nodes(new_dataset, *reclustered.nodes)
예제 #13
0
    def do_bgm(self, n_components=6, seed=42):
        """Bayesian Gaussian Mixture.

        Infer the effective number of components in a Gaussian Mixture Model via variational Bayesian estimation.

        n_effective_componenents < n_components if the model sets some weights close to 0.

        Args:
            n_components (int): Number of components in GMM.
            seed (int): Random seed.

        Returns:
            bgm_output (dict): Labels and probabilities.

        """

        np.random.seed(seed)
        bgm = BayesianGaussianMixture(n_components=n_components, covariance_type='full', weight_concentration_prior=1e-2, weight_concentration_prior_type='dirichlet_process', mean_precision_prior=1e-2, init_params='random', max_iter=100, random_state=seed)

        bgm.fit(self.X)
        bgm_labels = bgm.predict(self.X)
        bgm_prob = bgm.predict_proba(self.X)[:,0]

        bgm_output = {'bgm_labels': bgm_labels, 'bgm_prob': bgm_prob}

        return bgm_output
def airmass_labels(z, P, T, H2O, O3, n_airmass=5, labels=None):
    cH2O = mf2mol_cum(H2O, P, T)
    cO3 = mf2mol_cum(O3, P, T)
    T_surf = T[:, z < 3].mean(axis=1)
    T_grad = np.diff(T[:, z < 6], axis=1).mean(axis=1)
    H2O_tot = cH2O[:, -1]
    O3_tot = cO3[:, -1]
    f = lambda x: (x - x.mean()) / x.std()
    features = np.vstack((f(T_surf), f(T_grad), f(H2O_tot), f(O3_tot))).T
    if labels is None:
        pdf = BayesianGaussianMixture(n_components=n_airmass,
                                      covariance_type='full',
                                      max_iter=25000)
        pdf.fit(features)
        labels = pdf.predict(features)
    plt.figure()
    for ii in range(n_airmass):
        ix = labels == ii
        plt.subplot(1, 3, 1)
        plt.plot(T_surf[ix], H2O_tot[ix], '.')
        plt.xlabel('Mean T (z<3km) [K]')
        plt.ylabel('Total H2O [mol]')
        plt.subplot(1, 3, 2)
        plt.plot(T_surf[ix], 1e6 * O3_tot[ix], '.')
        plt.xlabel('Mean T (z<3km) [K]')
        plt.ylabel('Total O3 [µmol]')
        plt.subplot(1, 3, 3)
        plt.plot(H2O_tot[ix], 1e6 * O3_tot[ix], '.')
        plt.xlabel('Total H2O [mol]')
        plt.ylabel('Total O3 [µmol]')
    return labels
def test_bayesian_mixture_fit_predict_n_init():
    # Check that fit_predict is equivalent to fit.predict, when n_init > 1
    X = np.random.RandomState(0).randn(1000, 5)
    gm = BayesianGaussianMixture(n_components=5, n_init=10, random_state=0)
    y_pred1 = gm.fit_predict(X)
    y_pred2 = gm.predict(X)
    assert_array_equal(y_pred1, y_pred2)
class VBEM(object):
    def __init__(self, n_components=5, dataset=None):
        self.model = BayesianGaussianMixture(n_components=n_components,
                                             max_iter=10000)
        self.n_components = n_components
        self.class_num = dataset.class_num
        self.data_num = dataset.data_num
        self.data = dataset.data
        self.label = dataset.label
        self.bestVBEM_k = 0
        self.model.fit(self.data)

    def draw(self):
        label = self.model.predict(self.data)
        self.bestVBEM_k = max(label) + 1
        data_2d = pd.DataFrame(self.data, columns=['x', 'y'])
        label_2d = pd.DataFrame(label, columns=['label'])
        label_names = np.unique(label)
        colors = [
            plt.cm.tab10(i / float(len(label_names)))
            for i in range(len(label_names))
        ]
        tmp_2d = pd.concat([data_2d, label_2d], axis=1)

        plt.figure()
        for i, label in enumerate(label_names):
            plt.scatter(tmp_2d.loc[tmp_2d.label == label].x,
                        tmp_2d.loc[tmp_2d.label == label].y,
                        s=5,
                        cmap=colors[i],
                        alpha=0.5)
        plt.title('Best GMM with VBEM_' + str(self.class_num) + '_' +
                  str(self.data_num))
        plt.savefig('res/GMM_VBEM_' + str(self.class_num) + '_' +
                    str(self.data_num) + '.jpg')
def test_bayesian_mixture_fit_predict_n_init():
    # Check that fit_predict is equivalent to fit.predict, when n_init > 1
    X = np.random.RandomState(0).randn(50, 5)
    gm = BayesianGaussianMixture(n_components=5, n_init=10, random_state=0)
    y_pred1 = gm.fit_predict(X)
    y_pred2 = gm.predict(X)
    assert_array_equal(y_pred1, y_pred2)
def test_bayesian_mixture_predict_predict_proba():
    # this is the same test as test_gaussian_mixture_predict_predict_proba()
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            Y = rand_data.Y
            bgmm = BayesianGaussianMixture(
                n_components=rand_data.n_components,
                random_state=rng,
                weight_concentration_prior_type=prior_type,
                covariance_type=covar_type)

            # Check a warning message arrive if we don't do fit
            assert_raise_message(NotFittedError,
                                 "This BayesianGaussianMixture instance"
                                 " is not fitted yet. Call 'fit' with "
                                 "appropriate arguments before using "
                                 "this method.", bgmm.predict, X)

            bgmm.fit(X)
            Y_pred = bgmm.predict(X)
            Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
            assert_array_equal(Y_pred, Y_pred_proba)
            assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95)
예제 #19
0
def genotype(cnvays):
    result = []
    n_com = 10 if cnvays.shape[1] >= 10 else cnvays.shape[1]
    n_init = 3
    for cnvay in cnvays:
        cnv = [[x] for x in cnvay]
        dpgmm = BayesianGaussianMixture(
            n_components=n_com,
            n_init=n_init,
            max_iter=10000,
            weight_concentration_prior_type='dirichlet_process').fit(cnv)
        labels = dpgmm.predict(cnv)
        normed_ay = np.arange(0, np.max(cnvay) + 0.5, 0.5)
        swlabels = {}
        for rawlabel in np.unique(labels):
            swlabels[rawlabel] = normed_ay[np.argmin(
                np.abs(normed_ay - np.median(cnvay[labels == rawlabel])))]
        newlabels = [swlabels[x] for x in labels]
        gtlabes = {0: 'dd', 0.5: 'Ad', 1: 'AA', 1.5: 'AB', 2: 'BB', 2.5: 'BC'}
        finalline = [gtlabes.get(x, 'M') for x in newlabels]
        if len(np.unique(finalline)) > 1:
            sc = silhouette_score(cnv, finalline,
                                  metric='euclidean')  # silhouette_score
            chs = calinski_harabaz_score(cnv, labels)  # calinski_harabaz_score
        else:
            sc = np.nan
            chs = np.nan
        llh = dpgmm.score(
            cnv)  # Log likelihood of the Gaussian mixture given X
        finalline += [sc, chs, llh]
        result.append(finalline)
    return result
예제 #20
0
 def predict_cp_interval(self, n_components = 30):
     '''
     Estimates the (phenotypical) levels of observed amplitudes, regardless of order. Consequently, each
     observed time-point is classified. Between each transition from one inferred level to another one, a
     change-point with uniform distirbution is inferred.
     
     :param n_components: maximum number of components of the mixture model (default is 30)
     '''
     
     logging.warn("Predicting CP intervals")
      
     state_mix = BayesianGaussianMixture(
         n_components, 
         n_init = 10,
         weight_concentration_prior_type = 'dirichlet_distribution',
         verbose = 1,
         max_iter = 500,
         tol=1e-12
     )
     
     observed = self.observation[~np.isnan(self.observation)].reshape(-1, 1)
     
     state_mix.fit(observed)
     
     classified = deepcopy(self.observation)
     predicted = state_mix.predict(classified[~np.isnan(classified)].reshape(-1, 1))
     classified[~np.isnan(classified)] = predicted
     
     last = None
     begin = 0
     for i, c in enumerate(classified):
         if not np.isnan(c):
             last = c
             begin = i
             break
     
     segments = []
     for i in range(begin, classified.shape[0]):
         if not np.isnan(classified[i]):
             if classified[i] != last:
                 s = np.max(np.argwhere(~np.isnan(classified[0:i-1])))
                 segments.append((s, i))
             last = classified[i]
             begin = i
     
     
     # calculate uniform distribution parameters
     result = []
     for segment in segments:
         a = segment[0]
         b = segment[1]
         distro = {
             'begin': a,
             'end': b
         }
         result.append(distro)
     result = pd.DataFrame(result)
     
     return result, state_mix
예제 #21
0
def clusteringBayGaussMixt(X, y, nclusters, paramlist):

    bgm = BayesianGaussianMixture(n_components=nclusters, covariance_type='full', tol=0.001, reg_covar=1e-06, max_iter=100, n_init=1,\
                                  init_params='kmeans', weight_concentration_prior_type='dirichlet_process', weight_concentration_prior=None,\
                                  mean_precision_prior=None, mean_prior=None, degrees_of_freedom_prior=None, covariance_prior=None, \
                                  random_state=None, warm_start=False, verbose=0, verbose_interval=10)
    bgm.fit(X, y)
    labels = bgm.predict(X)
    return labels
예제 #22
0
def convert_to_deciles(y, n=10, gmm=False):
    """
    By default converts to deciles, can be changed based on choice of n.
    """
    if gmm:
        # this is experimental
        bgm = BayesianGaussianMixture(n_components=10)
        bgm.fit(y.reshape(-1, 1))
        return bgm.predict(y.reshape(-1, 1))
    return np.array(pd.cut(y, n, labels=range(n)))
예제 #23
0
def bayesian_gaussian_mixture(latent):
    gauss_mix = BayesianGaussianMixture(
        n_components=N_COMPONENTS,
        covariance_type=COVARIANCE_TYPE,
        weight_concentration_prior_type=WEIGHT_CONCENTRATION_PRIOR_TYPE,
        weight_concentration_prior=WEIGHT_CONCENTRATION_PRIOR,
        max_iter=MAX_ITER,
        verbose=VERBOSE).fit(latent)
    labels = gauss_mix.predict(latent)
    return labels
예제 #24
0
def getBayesianGaussian(filename, targetname):
    # use Bayesian Gaussian model on tsne
    matrix = tsne(filename)

    # fit the model
    model = BayesianGaussianMixture(n_components=8).fit(matrix)
    label = model.predict(matrix)
    print(label)

    # generate graph
    getTsne(filename, targetname, label)
예제 #25
0
    def cluster_embeddings(
        self,
        cluster_method,
        num_clusters,
        use_decomposed,
        additional_params={"random_state": 0},
    ):
        """
        @param cluster_method clustering method, one of {KMeans, Spectral, GaussianMix, BayesGaussMix}
        @param num_clusters number of clusters
        @param use_decomposed boolean, whether to use decomposed or raw representations
        @param additional_params {param_name: value} of parameters accepted by the sklearn clustering function.
            Cannot include n_components.

        Predicts cluster assignments for data points on raw or decomposed embedding representations.
        """
        self.cluster_method = cluster_method
        self.num_clusters = num_clusters

        if use_decomposed:
            vec_df = self.decomposed_embedding_representation
        else:
            vec_df = self.embedding_representation

        if self.cluster_method == "KMeans":
            self.predicted_labels = KMeans(
                n_clusters=self.num_clusters, **additional_params
            ).fit_predict(vec_df)

        if self.cluster_method == "Spectral":
            self.predicted_labels = SpectralClustering(
                n_clusters=self.num_clusters,
                affinity="cosine",
                assign_labels="discretize",
                **additional_params,
            ).fit_predict(vec_df)

        if self.cluster_method == "GaussianMix":
            gm_model = GaussianMixture(
                n_components=self.num_clusters, **additional_params
            ).fit(vec_df)

            self.predicted_labels = gm_model.predict(vec_df)

        if self.cluster_method == "BayesGaussMix":
            bgm_model = BayesianGaussianMixture(
                n_components=self.num_clusters, **additional_params
            ).fit(vec_df)

            self.predicted_labels = bgm_model.predict(vec_df)
            self.num_clusters = len(
                set(self.predicted_labels)
            )  # set num_clusters to actual num components
예제 #26
0
def embed_mixture_variational(xmaps_np,
                              n_components,
                              ):
    sample_by_feature = np.vstack([np_map.flatten()
                                   for dtag, np_map
                                   in xmaps_np.items()
                                   ]
                                  )

    # mixture = BayesianGaussianMixture()

    begin = time.time()

    pca = PCA(n_components=50)
    sample_by_feature_pca = pca.fit_transform(sample_by_feature)

    print("shape is: {}".format(sample_by_feature_pca.shape))

    mixture = BayesianGaussianMixture(n_components,
                                      covariance_type="spherical",
                                      verbose=10,
                                      verbose_interval=2,
                                      )
    mixture.fit(sample_by_feature_pca)

    finish = time.time()

    print(mixture)

    print("Finished in {}".format(finish - begin))

    print(mixture.predict(sample_by_feature_pca))

    print(mixture.weights_)

    clusters = mixture.predict(sample_by_feature_pca)

    probabilities = mixture.score_samples(sample_by_feature_pca)

    return mixture, pca, clusters, probabilities
예제 #27
0
 def split_units(self, n_clusters):
     """Splits recovered spikes per units into clusters."""
     self.n_clusters = n_clusters
     if self.cid is None:
         self.cid = []
         for i in tqdm(range(self.n_unit), 'Splitting Units'):
             f = self.features[i]
             f = f.reshape([f.shape[0], self.n_feat * self.n_main_chan])
             clustering = BayesianGaussianMixture(
                 n_components=n_clusters, max_iter=500)
             clustering.fit(f)
             self.cid.append(clustering.predict(f))
     return self.cid
예제 #28
0
def bayesian_gaussian_mixture(vector: np.array, n: int, BIC_calculate = False):
    if BIC_calculate == True:
        np.random.seed(140597)
        mask = np.random.choice([False, True], len(vector), p=[0.70, 0.30])
        model_train = BayesianGaussianMixture(n_components=n, covariance_type='full').fit(vector[~mask])
        validation_score = model_train.score(vector[mask])
        train_score = model_train.score(vector[~mask])
        return validation_score, train_score
    else:
        np.random.seed(140597)
        mask = np.random.choice([False, True], len(vector), p=[0.70, 0.30])
        dpgmm = BayesianGaussianMixture(n_components=n, covariance_type='full', max_iter=900, tol=1e-4).fit(vector[~mask])
        cluster_label = dpgmm.predict(vector)
        return cluster_label
예제 #29
0
 def _dpgmm(fet, n_comp=8, max_iter=400):
     from sklearn.mixture import BayesianGaussianMixture as DPGMM
     dpgmm = DPGMM(n_components=n_comp,
                   covariance_type='full',
                   weight_concentration_prior=1e-3,
                   weight_concentration_prior_type='dirichlet_process',
                   init_params="kmeans",
                   max_iter=100,
                   random_state=0,
                   verbose=0,
                   verbose_interval=10)  # init can be "kmeans" or "random"
     dpgmm.fit(fet)
     label = dpgmm.predict(fet)
     return label
예제 #30
0
def gmm_hyper(hyper_image, features, n_clusters):
    """
    """

    gmm = BayesianGaussianMixture(n_components=n_clusters,
                                  covariance_type='spherical').fit(features)
    labels = gmm.predict(features)

    label_image = labels.reshape(hyper_image.shape[:-1])

    gmm_spectra = average_spectra(hyper_image, labels)

    score = calinski_harabaz_score(features, label_image.ravel())

    return label_image, gmm_spectra, score
예제 #31
0
파일: sccaf.py 프로젝트: lhqing/ALLCools
def run_BayesianGaussianMixture(Y, K):
    """
    For K-means clustering

    Input
    -----
    Y: the expression matrix
    K: number of clusters

    return
    -----
    clusters assigned to each cell.
    """
    gmm = BayesianGaussianMixture(K, max_iter=1000)
    gmm.fit(Y)
    return gmm.predict(Y)
예제 #32
0
    def Bayesian_gmm_inference(self, data: Union[pd.DataFrame, np.ndarray],
                               **params) -> None:
        """
        Bayesian inference of parameters by the EM algorithms of sklearn,
        accept only DataFrame with numerical data, please do the feature engineering before enter the Dataframe
        :param data: Set of data to do the inference
        :return: None
        """
        Bayesian_gmm = BayesianGaussianMixture(**params)
        Bayesian_gmm.fit(data)

        self.means = Bayesian_gmm.means_
        self.precision = Bayesian_gmm.precisions_
        self.weight = Bayesian_gmm.weights_
        self.label = Bayesian_gmm.predict(data)
        self.data = data
        self.nrb_comp = Bayesian_gmm.n_components
    ax1_min, ax1_max, ax2_min, ax2_max = plt.axis()
    plt.xlim((x1_min, x1_max))
    plt.ylim((x2_min, x2_max))
    plt.title('GMM', fontsize=15)
    plt.grid(b=True, ls=':', color='#606060')

    # DPGMM
    dpgmm = BayesianGaussianMixture(n_components=n_components, covariance_type='full', max_iter=1000, n_init=5,
                                    weight_concentration_prior_type='dirichlet_process', weight_concentration_prior=0.1)
    dpgmm.fit(x)
    centers = dpgmm.means_
    covs = dpgmm.covariances_
    print('DPGMM均值 = \n', centers)
    print('DPGMM方差 = \n', covs)
    y_hat = dpgmm.predict(x)
    print(y_hat)

    ax = plt.subplot(212)
    grid_hat = dpgmm.predict(grid_test)
    grid_hat = grid_hat.reshape(x1.shape)
    plt.pcolormesh(x1, x2, grid_hat, cmap=cm)
    plt.scatter(x[:, 0], x[:, 1], s=20, c=y, cmap=cm, marker='o', edgecolors='#202020')

    for i, cc in enumerate(zip(centers, covs)):
        if i not in y_hat:
            continue
        center, cov = cc
        value, vector = sp.linalg.eigh(cov)
        width, height = value[0], value[1]
        v = vector[0] / sp.linalg.norm(vector[0])
예제 #34
0
appl = 'WHE'

# Create vector with P and Q values and plot them
P = d[appl].P[init:end].values
Q = d[appl].Q[init:end].values
X = np.transpose([P, Q])

plt.plot(d[appl].P[init:end], d[appl].Q[init:end],'o', alpha=0.1)

# Normalize X
sscl = StandardScaler().fit(X)
X = sscl.transform(X)

# Apply clusterer
bgm = BayesianGaussianMixture(n_components=33, covariance_type='full', weight_concentration_prior_type='dirichlet_distribution', random_state=42).fit(X)
y_pred = bgm.predict(X)

# Plot clusters with X unnormalized
X = sscl.inverse_transform(X)
plt.figure()
plt.scatter(X[:,0],X[:,1], color=colors[y_pred])
means = sscl.inverse_transform(bgm.means_)
medians = get_medians(X, y_pred)
# plt.plot(means[:,0],means[:,1],'kx')
plt.plot(medians[:,0],medians[:,1],'kx')


# TODO: Compare mean with ground truth
plt.figure()
plt.plot(P)
P_pred = means[y_pred][:,0]
예제 #35
0
          data_thr.rateC, data_thr.rateCA]

scaler = StandardScaler()
X = scaler.fit_transform(X)

# 1 corresponds to data_thr.rate and 4=5-1 to data_thr.rateC
w = w / np.sqrt(scaler.var_[1:])
# w = np.exp(-np.exp(3 * w.mean(axis=1)))
w = 1. / w.mean(axis=1) ** 2

Html_file = open("gmm_sklearn_files/gmm3_sklearn.html", "w")

gmm = BayesianGaussianMixture(n_components=3, alpha_prior=0.1, beta_prior=1,
                              n_init=5)
gmm.fit(X)  # , weights=w) not implemented in sklearn yet
preds = gmm.predict(X)
probs = gmm.predict_proba(X)

data_thr['preds'] = pd.Series(preds).astype("category")

color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
             "brown", "green", "orange"]  # Spectral9
color_key = color_key[:len(set(preds))+1]

covs = gmm.covariances_
means = gmm.means_

# transform cov for non-standardizeed data:
covs = np.array([np.dot(np.diag(np.sqrt(scaler.var_)),
                        np.dot(covs[j], np.diag(np.sqrt(scaler.var_))))
                 for j in range(covs.shape[0])])