Beispiel #1
0
    def test_log_responsibilities(self):
        """
        Test the log responsibilities with the help of Sklearn.
        """
        N = 16384
        S = 2048
        D = 128

        means = torch.randn(S, D)
        covs = torch.rand(S)
        x = torch.randn(N, D)
        prior = torch.rand(S)
        prior /= prior.sum()
        mixture = GaussianMixture(S, covariance_type='spherical')
        mixture.means_ = means.numpy()
        mixture.precisions_cholesky_ = np.sqrt(1 / covs.numpy())
        mixture.weights_ = prior.numpy()

        # pylint: disable=protected-access
        _, expected = mixture._estimate_log_prob_resp(x.numpy())
        expected = torch.from_numpy(expected)

        probs = log_normal(x, means, covs, 'spherical')
        predicted = log_responsibilities(probs, prior)

        self.assertTrue(
            torch.allclose(expected, predicted, atol=1e-03, rtol=1e-05))
    def log_prob(self, x, t, feature, samples):
        observations = x[:, :t]
        p_s_past = {}  #p(s_t-1|X_{0:t-1})
        for st in self.states:
            p_s_past[st], _, _ = fwd_bkw(observations, self.states,
                                         self.start_probability,
                                         self.transition_probability,
                                         self.emission_probability, st)

        p_currstate_past = {}
        for s in self.states:
            p_currstate_past[s] = 0.

        for curr_state in self.states:
            for st in self.states:
                p_currstate_past[curr_state] += self.transition_probability[
                    curr_state][st] * p_s_past[st]

        gmm = GaussianMixture(n_components=len(self.states),
                              covariance_type='full')
        gmm.fit(np.random.randn(10, observations.shape[0]))
        gmm.weights_ = list(p_currstate_past.values())
        gmm.means_ = np.array(self.mean)
        gmm.covariances_ = np.array(self.cov)
        for i in range(2):
            gmm.precisions_[i] = np.linalg.inv(gmm.covariances_[i])
            gmm.precisions_cholesky_[i] = np.linalg.cholesky(
                gmm.covariances_[i])

        return gmm.score_samples(samples)
Beispiel #3
0
def sample_gaussian_mixture(pis, sigmas, mus, num_samples, n_features):
    """
    return: array of size (batch_size,num_samples*n_features) containing samples 
    taken from the gaussian mixture parameratized by pis, sigmas, mus
        e.g
        input 1 [[ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ],
        input 2  [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ],
        input 3  [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ],
        .        [...............................................],
        input n  [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ]]
    """
    # Gaussian PDF parameters
    batch_size = pis.shape[0]
    num_mixtures = pis.shape[1]
    samples = np.zeros((batch_size, num_samples * n_features))
    gmm = GaussianMixture(n_components=num_mixtures,
                          covariance_type='spherical')
    gmm.fit(np.random.rand(10, 1))  # Now it thinks it is trained
    for i in range(batch_size):
        gmm.weights_ = pis[i]
        gmm.means_ = mus[i].reshape(num_mixtures, n_features)
        gmm.covariances_ = np.expand_dims(sigmas[i], axis=1)**2
        sample = gmm.sample(num_samples)
        samples[i] = np.ravel(sample[0])
    return Variable(torch.from_numpy(samples))
Beispiel #4
0
def get_P_of_Data_Given_Param(means,
                              covs,
                              weights,
                              X,
                              method='scipy'):  # P(Data | Param)
    samples = zip(range(0, len(X)), X)
    p = {}
    if method == 'scipy':
        g = [
            multivariate_normal(mean=means[k],
                                cov=covs[k],
                                allow_singular=False)
            for k in range(0, len(weights))
        ]
        gaussians = {}
        for index, x in samples:
            gaussians[index] = np.array([g_k.pdf(x) for g_k in g])
        for index, x in samples:
            probabilities = np.multiply(gaussians[index], weights)
            probabilities = probabilities / np.sum(probabilities)
            p[index] = probabilities
    else:
        gmm = GaussianMixture(n_components=len(weights),
                              covariance_type='diag').fit(X)
        gmm.precisions_cholesky_ = 1  # crude way to make GMM think that model is fit
        gmm.means_ = means
        gmm.covariances_ = covs
        gmm.weights_ = weights

        for index, x in samples:
            x = x.reshape(1, -1)
            likelihood_ratio = gmm.predict_proba(x.reshape(1, -1))
            # likelihood_ratio = likelihood_ratio / np.sum(likelihood_ratio)
            p[index] = likelihood_ratio
    return p
Beispiel #5
0
 def toGaussianMixture(self):
     g = GaussianMixture(self.n)
     g.fit(np.random.rand(2 * self.n).reshape((-1, 1)))
     g.weights_ = np.array(self.weights)
     g.means_ = np.array(self.means)[:, np.newaxis]
     g.covariances_ = np.array(self.covariances)[:, np.newaxis, np.newaxis]
     return g
Beispiel #6
0
def cause(n, k, p1, p2):
    g = GaussianMixture(k)
    g.means_ = p1 * np.random.randn(k, 1)
    g.covars_ = np.power(abs(p2 * np.random.randn(k, 1) + 1), 2)
    g.weights_ = abs(np.random.rand(k, 1))
    g.weights_ = g.weights_ / sum(g.weights_)
    return scale(g.sample(n))
def get_3d_grid_gmm(subdivisions=[5, 5, 5], variance=0.04):
    """
    Compute the weight, mean and covariance of a gmm placed on a 3D grid
    :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [
        1.0 / (subdivisions[0]), 1.0 / (subdivisions[1]),
        1.0 / (subdivisions[2])
    ]

    means = np.mgrid[step[0] - 1:1.0 - step[0]:complex(0, subdivisions[0]),
                     step[1] - 1:1.0 - step[1]:complex(0, subdivisions[1]),
                     step[2] - 1:1.0 - step[2]:complex(0, subdivisions[2])]
    means = np.reshape(means, [3, -1]).T
    covariances = variance * np.ones_like(means)
    weights = (1.0 / n_gaussians) * np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm
Beispiel #8
0
    def test_fit(self):
        expected_means = np.array([-55., 0., 7.])
        expected_stds = np.array([3., .5, 1.])

        from sklearn.mixture import GaussianMixture
        gmm = GaussianMixture(n_components=3, means_init=expected_means)
        gmm.means_ = expected_means[..., None]
        gmm.covariances_ = np.array(expected_stds[..., None, None])
        gmm.weights_ = np.array([1 / 3, 1 / 3, 1 / 3])

        obs = gmm.sample(100000 + np.random.randint(-3, 3))[0].squeeze()

        init = deeptime.markov.hmm.init.gaussian.from_data(obs,
                                                           n_hidden_states=3,
                                                           reversible=True)
        hmm_est = deeptime.markov.hmm.MaximumLikelihoodHMM(init)
        hmm = hmm_est.fit(obs).fetch_model()

        np.testing.assert_array_almost_equal(
            hmm.transition_model.transition_matrix, np.eye(3), decimal=3)
        m = hmm.output_model
        for mean, sigma in zip(m.means, m.sigmas):
            # find the mean closest to this one (order might have changed)
            mean_ix = np.argmin(np.abs(expected_means - mean))
            np.testing.assert_almost_equal(mean,
                                           expected_means[mean_ix],
                                           decimal=1)
            np.testing.assert_almost_equal(sigma * sigma,
                                           expected_stds[mean_ix],
                                           decimal=1)
Beispiel #9
0
def cluster(data, num_of_clusters):
    x = data[[0, 1]]
    y = data['label']
    means = []
    for clus in range(num_of_clusters):
        clus_x = data[(data['label'] == clus)]
        #print 'clus: %d' % clus
        #print clus_x
        means.append([clus_x[0].mean(), clus_x[1].mean()])
    # print 'x'
    # print x
    # print 'y'
    # print y
    # clusterer = KMeans(n_clusters=num_of_clusters)
    # clusterer = GaussianMixture(n_components=num_of_clusters, means_init=means)
    clusterer = GaussianMixture(n_components=num_of_clusters)
    clusterer.fit(x, y)
    print 'clusterer means: '
    print clusterer.means_
    for clus in range(num_of_clusters):
        xx, yy = clusterer.means_[clus]
        plt.scatter([xx], [yy], c=colors[clus], marker="s", s=200, alpha=0.5)
    means = np.array(means)
    print 'computer means:'
    print means
    #print pd.DataFrame(means)
    clusterer.means_ = means
    print 'clusterer new means: '
    print clusterer.means_
    for clus in range(num_of_clusters):
        xx, yy = clusterer.means_[clus]
        plt.scatter([xx], [yy], c=colors[clus], marker="d", s=200, alpha=0.5)
    preds = clusterer.predict(x)
    data['pred'] = preds
    draw_features_cluster(data, num_of_clusters)
Beispiel #10
0
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = GaussianMixture(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features)
    g.weights_ = normalized(prng.rand(n_mix))
    return g
Beispiel #11
0
def gmm_scale(gmm, shift=None, scale=None, reverse=False, params=None):
    """
    Apply scaling factors to GMM instances.

    Parameters
    ----------
    gmm : GaussianMixture
        GMM instance to be scaled.
    shift : int, float, optional
        Shift for the entire model. Default is 0 (no shift).
    scale : int, float, optional
        Scale for all components. Default is 1 (no scale).
    reverse : bool, optional
        Whether the GMM should be reversed.
    params
        GaussianMixture params for initialization of new instance.

    Returns
    -------
    GaussianMixture
        Modified GMM instance.

    """

    # Fetch parameters if not supplied
    if params is None:
        # noinspection PyUnresolvedReferences
        params = gmm.get_params()

    # Instantiate new GMM
    gmm_new = GaussianMixture(**params)

    # Create scaled fitted GMM model
    gmm_new.weights_ = gmm.weights_

    # Apply shift if set
    gmm_new.means_ = gmm.means_ + shift if shift is not None else gmm.means_

    # Apply scale
    if scale is not None:
        gmm_new.means_ /= scale

    gmm_new.covariances_ = gmm.covariances_ / scale ** 2 if scale is not None else gmm.covariances_
    gmm_new.precisions_ = np.linalg.inv(gmm_new.covariances_) if scale is not None else gmm.precisions_
    gmm_new.precisions_cholesky_ = np.linalg.cholesky(gmm_new.precisions_) if scale is not None \
        else gmm.precisions_cholesky_

    # Reverse if set
    if reverse:
        gmm_new.means_ *= -1

    # Add converged attribute if available
    if gmm.converged_:
        gmm_new.converged_ = gmm.converged_

    # Return scaled GMM
    return gmm_new
def computeProb(mfcc):
    # sil
    sil_mean = np.loadtxt('sil_mean.txt')
    sil_variance = np.loadtxt('sil_variance.txt')
    sil_weight = np.loadtxt('sil_weight.txt')
    sil_gmm = GaussianMixture(128, covariance_type="diag")
    sil_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky(
        sil_variance, "diag")

    sil_gmm.means_ = sil_mean
    sil_gmm.weights_ = sil_weight
    sil_gmm.precisions_cholesky_ = sil_precisions_cholesky_
    sil_result = np.dot(sil_gmm.predict_proba(mfcc), sil_weight.reshape(-1, 1))

    # speech
    speech_mean = np.loadtxt('speech_mean.txt')
    speech_variance = np.loadtxt('speech_variance.txt')
    speech_weight = np.loadtxt('speech_weight.txt')
    speech_gmm = GaussianMixture(128, covariance_type="diag")
    speech_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky(
        speech_variance, "diag")

    speech_gmm.means_ = speech_mean
    speech_gmm.weights_ = speech_weight
    speech_gmm.precisions_cholesky_ = speech_precisions_cholesky_
    speech_result = np.dot(speech_gmm.predict_proba(mfcc),
                           speech_weight.reshape(-1, 1))

    # noise
    noise_mean = np.loadtxt('noise_mean.txt')
    noise_variance = np.loadtxt('noise_variance.txt')
    noise_weight = np.loadtxt('noise_weight.txt')
    noise_gmm = GaussianMixture(128, covariance_type="diag")
    noise_precisions_cholesky_ = gaussian_mixture._compute_precision_cholesky(
        noise_variance, "diag")

    noise_gmm.means_ = noise_mean
    noise_gmm.weights_ = noise_weight
    noise_gmm.precisions_cholesky_ = noise_precisions_cholesky_
    noise_result = np.dot(noise_gmm.predict_proba(mfcc),
                          noise_weight.reshape(-1, 1))

    return sil_result, speech_result, noise_result
def gmm_cause(points, k=2, p1=3, p2=4):
    """Init a root cause with a Gaussian Mixture Model w/ a spherical covariance type."""
    g = GMM(k, covariance_type="spherical")
    g.fit(np.random.randn(300, 1))

    g.means_ = p1 * np.random.randn(k, 1)
    g.covars_ = np.power(abs(p2 * np.random.randn(k, 1) + 1), 2)
    g.weights_ = abs(np.random.rand(k))
    g.weights_ = g.weights_ / sum(g.weights_)
    return g.sample(points)[0].reshape(-1)
Beispiel #14
0
    def return_copy(self):
        '''If any trouble be sure that assignation of 
            means and weights is done copying through assignation        
        '''
        copy_tmp = GMM(n_components=self.n_components)

        copy_tmp.covariances_ = self.covariances_  #_get_covars()
        copy_tmp.means_ = self.means_
        copy_tmp.weights_ = self.weights_

        return copy_tmp
def multimod_emd_from_gmm(means, sigmas, weights):
    means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0]
    sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0]
    weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0]
    gmm = GaussianMixture(n_components=4, covariance_type='diag')
    gmm_vars = 2 * sigmas_stacked * sigmas_stacked
    precisions_cholesky = _compute_precision_cholesky(gmm_vars, 'diag')
    gmm.weights_ = weights_stacked
    gmm.means_ = means_stacked
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = gmm_vars
    y_sampled, _ = gmm.sample(1000)
    return wemd_from_pred_samples(y_sampled)
def get_multimodality_score(means, sigmas, weights):
    means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0]
    sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0]
    weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0]
    gmm = GaussianMixture(n_components=4, covariance_type='diag')
    vars = 2 * sigmas_stacked * sigmas_stacked
    precisions_cholesky = _compute_precision_cholesky(vars, 'diag')
    gmm.weights_ = weights_stacked
    gmm.means_ = means_stacked
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = vars
    gmm_uni = GaussianMixture(n_components=1, covariance_type='diag')
    argmax = np.argmax(gmm.weights_)
    gmm_uni.means_ = gmm.means_[argmax, :].reshape([1, 2])
    gmm_uni.covariances_ = gmm.covariances_[argmax, :].reshape([1, 2])
    gmm_uni.precisions_cholesky_ = gmm.precisions_cholesky_[argmax, :].reshape(
        [1, 2])
    gmm_uni.weights_ = np.array([1]).reshape([1])
    Z_uni = compute_histogram_gmm(gmm_uni)
    Z = compute_histogram_gmm(gmm)
    ratio = computeWEMD(Z, Z_uni)
    return ratio
Beispiel #17
0
def create_sklearn_gmm(weights, mean_tensor, cov_tensor, random_state=0):
    n_components = len(weights)
    gmm = GaussianMixture(n_components=n_components,
                          covariance_type='full',
                          random_state=random_state)
    gmm.weights_ = weights.numpy()
    gmm.means_ = mean_tensor.numpy()
    gmm.covariances_ = cov_tensor.numpy()
    gmm.precisions_ = np.array(
        [np.linalg.inv(cov) for cov in gmm.covariances_])
    gmm.precisions_cholesky_ = np.array(
        [np.linalg.cholesky(prec) for prec in gmm.precisions_])
    return gmm
Beispiel #18
0
    def jsd_diss(self, w1, mu1, cov1, w2, mu2, cov2):
        """
		Calculates Jensen-Shannon divergence of two gmm's
		:param gmm_p: mixture.GaussianMixture
		:param gmm_q: mixture.GaussianMixture
		:param sample_count: number of monte carlo samples to use
		:return: Jensen-Shannon divergence
		"""
        gmm_p = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_p.weights_ = w1
        gmm_p.covariances_ = cov1
        gmm_p.means_ = mu1
        gmm_p.n_components = 1
        gmm_p.precisions_cholesky_ = _compute_precision_cholesky(cov1, "full")

        gmm_q = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_q.weights_ = w2
        gmm_q.covariances_ = cov2
        gmm_q.means_ = mu2
        gmm_q.n_components = 1
        gmm_q.precisions_cholesky_ = _compute_precision_cholesky(cov2, "full")

        X = gmm_p.sample(sample_count)[0]
        log_p_X = gmm_p.score_samples(X)
        log_q_X = gmm_q.score_samples(X)
        log_mix_X = np.logaddexp(log_p_X, log_q_X)

        Y = gmm_q.sample(sample_count)[0]
        log_p_Y = gmm_p.score_samples(Y)
        log_q_Y = gmm_q.score_samples(Y)
        log_mix_Y = np.logaddexp(log_p_Y, log_q_Y)

        # black magic?
        return (log_p_X.mean() -
                (log_mix_X.mean() - np.log(2)) + log_q_Y.mean() -
                (log_mix_Y.mean() - np.log(2))) / 2
Beispiel #19
0
def generate_equal_weight_GMM(H_mu, H_var, covariance_type='diag'):
    n_components = len(H_mu)
    weights_init = len(H_mu) * [1. / len(H_mu)]
    GMM = GaussianMixture(n_components=n_components,
                          covariance_type=covariance_type, n_init=0, weights_init=None, means_init=None,
                          precisions_init=None, random_state=None, warm_start=True, verbose=0,
                          verbose_interval=10)
    GMM.weights_ = weights_init
    GMM.means_ = H_mu
    GMM.covariances_ = H_var
    GMM.precisions_cholesky_ = _compute_precision_cholesky(
        H_var, covariance_type)

    return GMM
Beispiel #20
0
def gmm_loglik(y,pi,mu,sigma,K):
    model = GaussianMixture(K, covariance_type = 'diag')
    model.fit(y)
    N = np.shape(mu)[0]
    N_test = np.shape(y)[0]
    ll_test = np.zeros(N)
    for i in (range(N)):
        model.means_ = mu[i,:]
        model.covariances_ = sigma[i,:]**2
        model.precisions_ = 1/(sigma[i,:]**2)
        model.weights_ = pi[i,:]
        model.precisions_cholesky_ = _compute_precision_cholesky(model.covariances_, model.covariance_type)
        ll_test[i] = model.score(y) 
    return ll_test*N_test
Beispiel #21
0
def generate_params_gmm(weight, mean, cov, use_cdf=False):
    gmm = GaussianMixture(n_components=weight.size)
    gmm.weights_ = weight
    gmm.means_ = mean
    gmm.covariances_ = cov
    # Pass the fit check
    gmm.precisions_cholesky_ = None
    params = gmm.sample()[0][0]
    if use_cdf:
        params = ndtr(params)
    else:
        params = np.clip(params, 0, 1)
    params = params.tolist()
    return params
Beispiel #22
0
def visualize_latent_space(file_name,
                           z_mean,
                           z_std,
                           x_label='$\\mathbf{z}$',
                           y_label='pdf',
                           show=False):
    """Visualizes approximation ability.

    Args:
        file_name: File name without extension.
        z_mean: ndarray (N, dZ) with latent states.
        z_std: ndarray (N, dZ, dZ) with std of latent states.
        x_label: ALbel of the x-axis.
        y_label: ALbel of the y-axis.
        show: Display generated plot. This is a blocking operation.

    """
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    ax1.set_xlabel(x_label)
    ax1.set_ylabel(y_label)
    ax1.grid(linestyle=':')

    N, dZ = z_mean.shape

    xs = np.linspace(-3, 3, 1000).reshape(1000, 1)
    plt.plot(xs,
             sp.stats.norm.pdf(xs),
             color="black",
             linestyle=":",
             label='$\\mathcal{N}(0,1)$')
    for dim in range(dZ):
        # Fit GMM by hand
        gmm = GaussianMixture(N)
        gmm.means_ = z_mean[:, dim].reshape(N, 1)
        gmm.precisions_cholesky_ = (1 / z_std[:, dim]).reshape(N, 1, 1)
        gmm.weights_ = np.ones(N) / N
        ax1.plot(xs,
                 np.exp(gmm.score_samples(xs)),
                 linewidth=1,
                 label='$\\mathbf{z}[%d]$' % dim)

    ax1.legend()

    if file_name is not None:
        fig.savefig(file_name + ".pdf", bbox_inches='tight', pad_inches=0)
    if show:
        plt.show()
    plt.close(fig)
Beispiel #23
0
  def _sample_rows_same(self, X):
    """ uses efficient sklearn implementation to sample from gaussian mixture -> only works if all rows of X are the same"""
    weights, locs, scales = self._get_mixture_components(np.expand_dims(X[0], axis=0))

    # make sure that sum of weights < 1
    weights = weights.astype(np.float64)
    weights = weights / np.sum(weights)

    gmm = GaussianMixture(n_components=self.n_centers, covariance_type='diag', max_iter=5, tol=1e-1)
    gmm.fit(np.random.normal(size=(100,self.ndim_y))) # just pretending a fit
    # overriding the GMM parameters with own params
    gmm.converged_ = True
    gmm.weights_ = weights[0]
    gmm.means_ = locs[0]
    gmm.covariances_ = scales[0]
    y_sample, _ = gmm.sample(X.shape[0])
    assert y_sample.shape == (X.shape[0], self.ndim_y)
    return X, y_sample
Beispiel #24
0
    def fit(self, df):
        if len(df.columns) > 2:
            return print('error: data should have up to 2-dimension')

        self.data = df
        if self.use_kmeans_init:
            gm = GaussianMixture(n_components=self.no_clusters, random_state=0)
            gm.means_ = init_k_means(df, no_clusters=self.no_clusters)
            print(f'k-means clustering initialize: {gm.means_}')
            gm.fit(df)
        else:
            gm = GaussianMixture(n_components=self.no_clusters,
                                 random_state=0).fit(df)
            print(f'clustering by no k-means initializing')
        self.means = gm.means_
        self.variance = gm.covariances_
        self.proportions = gm.weights_
        return self.means, self.variance, self.proportions
Beispiel #25
0
def read_pred():
    means = readFloat('%s-mixture_distribution_means.float3' %
                      predition_path)  # shape (4, 2)
    sigmas = readFloat('%s-mixture_distribution_sigmas.float3' %
                       predition_path)  # shape (4, 2)
    weights = readFloat('%s-mixture_distribution_weights.float3' %
                        predition_path)  # shape (4)

    sigmas = 2 * sigmas * sigmas

    gmm = GaussianMixture(n_components=4, covariance_type='diag')

    precisions_cholesky = _compute_precision_cholesky(sigmas, 'diag')
    gmm.weights_ = weights
    gmm.means_ = means
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = sigmas

    return gmm
Beispiel #26
0
def EM_Process(data, n, covt):
    '''
    data: array shape data. 
    n: the number of components
    covt: covariance_type 
    {‘full’, ‘tied’, ‘diag’, ‘spherical’}
    chose one of them. 
    
    '''

    GM = GaussianMixture(n_components=n,
                         covariance_type=covt,
                         max_iter=600,
                         random_state=3)
    GM.means_ = np.zeros(3)
    GM.covariances_ = np.identity(3)
    GM.fit(data)
    clusters = GM.predict(data)

    return clusters
Beispiel #27
0
    def train(self, train_data):
        # 1. Create a GMM object and specify the number of components (classes) in the object
        # 2. Fit the model to our training data. NOTE: You may need to reshape with np.reshape(-1,1)
        # 3. Return None
        data = np.array(train_data).reshape(-1, 1)
        gmm = GaussianMixture(n_components=2)
        fit = gmm.fit(data)

        sort_indices = gmm.means_.argsort(axis=0)
        order = sort_indices[:, 0]
        gmm.means_ = gmm.means_[order, :]
        gmm.covariances_ = gmm.covariances_[order, :]
        w = np.split(gmm.weights_, 2)
        w = np.asarray(w)
        w = np.ravel(w[order, :])
        gmm.weights_ = w

        self.__model = gmm

        return
Beispiel #28
0
def test_once_by_random_features():
    Xtrain = numpy.random.random_sample((5000)).reshape(-1, 10)
    Xtest = numpy.random.random_sample((500)).reshape(-1, 10)

    gmm_orig = GaussianMixture(n_components=8, random_state=1)
    gmm_copy = GaussianMixture()

    gmm_orig.fit(Xtrain)

    gmm_copy.weights_ = gmm_orig.weights_
    gmm_copy.means_ = gmm_orig.means_
    gmm_copy.covariances_ = gmm_orig.covariances_
    gmm_copy.precisions_ = gmm_orig.precisions_
    gmm_copy.precisions_cholesky_ = gmm_orig.precisions_cholesky_
    gmm_copy.converged_ = gmm_orig.converged_
    gmm_copy.n_iter_ = gmm_orig.n_iter_
    gmm_copy.lower_bound_ = gmm_orig.lower_bound_

    y_orig = gmm_orig.score_samples(Xtest)
    y_copy = gmm_copy.score_samples(Xtest)

    return all(y_orig == y_copy)
Beispiel #29
0
 def test__estimate_log_prob_resp_spherical_shared_compression(self):
     rs = np.random.RandomState(11)
     cov_type = 'spherical'
     gmm = GaussianMixture(n_components=3,
                           num_feat_full=5,
                           num_feat_comp=3,
                           num_feat_shared=3,
                           num_samp=4,
                           transform=None,
                           mask=None,
                           D_indices=None,
                           covariance_type=cov_type,
                           random_state=rs)
     gmm.fit_sparsifier(X=self.td.X)
     means = rs.rand(gmm.n_components, gmm.num_feat_full)
     covariances = rs.rand(gmm.n_components)
     weights = rs.rand(gmm.n_components)
     weights /= weights.sum()
     log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp(
         weights, means, covariances, cov_type)
     # find skl's values, pretty ugly to do.
     precisions = _compute_precision_cholesky(covariances, cov_type)
     gmm_skl = GMSKL(n_components=3, covariance_type=cov_type)
     # we need the mask to be shared so that we can use mask[0] on all means
     gmm_skl.means_ = means[:, gmm.mask[0]]
     gmm_skl.precisions_cholesky_ = precisions
     gmm_skl.weights_ = weights
     gmm_skl.covariance_type_ = cov_type
     log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp(
         gmm.RHDX)
     # if anything is bad later this overwrite with mean seems suspect:
     log_prob_norm_true = log_prob_norm_true.mean()
     # now get the log_prob from another function
     log_prob_true = _estimate_log_gaussian_prob(gmm.RHDX, gmm_skl.means_,
                                                 precisions, cov_type)
     # run the tests
     self.assertArrayEqual(log_prob_test, log_prob_true)
     self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test)
     self.assertArrayEqual(log_resp_true, log_resp_test)
Beispiel #30
0
def fit_markov_chain(y,plot=False):
	y_0 = y[:-1]
	y_1 = y[1:]
	grad_0 = np.gradient(y_0)
	grad_1 = np.gradient(y_1)
	state_1 = grad_1[np.where(grad_0 < 0)] # instances where previous gradient was negative
	state_2 = grad_1[np.where(grad_0 > 0)] # instances where previous gradient was positive
	mean_1,std_1 = stats.norm.fit(state_1)
	mean_2,std_2 = stats.norm.fit(state_2)
	# Reshaping parameters to be suitable for sklearn.GaussianMixture
	means = np.array([mean_1,mean_2])
	means = means.reshape(2,1)
	y_GM = np.concatenate((state_2.reshape(-1,1),state_1.reshape(-1,1)))
	precisions =  [1/(std_1**2),1/(std_2**2)]	
	GM = GaussianMixture(n_components=2,covariance_type='spherical')
	GM.weights_ = [0.5,0.5]
	GM.means_ = means
	GM.covariances_ = [std_1,std_2]
	GM.precisions_ = precisions
	GM.precisions_cholesky_ = precisions	
	GM.converged_ = True
	if(plot):
		samples = GM.sample(5000)[0]
		fig,ax_list = plt.subplots(3,1)
		fig.set_size_inches(20,20)
		ax_list[0].hist(state_1,bins=70)
		ax_list[1].hist(state_2,bins=70)
		lnspc_1 = np.linspace(state_1.min(),state_1.max(),y.shape[0])
		gauss_1 = stats.norm.pdf(lnspc_1, mean_1, std_1)
		lnspc_2 = np.linspace(state_2.min(),state_2.max(),y.shape[0])
		gauss_2 = stats.norm.pdf(lnspc_2, mean_2, std_2)
		ax_list[0].plot(lnspc_1,gauss_1)
		ax_list[1].plot(lnspc_2,gauss_2)
		ax_list[0].scatter(mean_1,30)
		ax_list[1].scatter(mean_2,30)
		ax_list[2].hist(samples,bins=100)
		plt.show()
	return GM
Beispiel #31
0
def fit_gmm_to_points(points,
                      n_components,
                      mdl,
                      ps=[],
                      num_iter=100,
                      covariance_type='full',
                      min_covar=0.001,
                      init_centers=[],
                      force_radii=-1.0,
                      force_weight=-1.0,
                      mass_multiplier=1.0):
    """fit a GMM to some points. Will return the score and the Akaike score.
    Akaike information criterion for the current model fit. It is a measure
    of the relative quality of the GMM that takes into account the
    parsimony and the goodness of the fit.
    if no particles are provided, they will be created

    points:            list of coordinates (python)
    n_components:      number of gaussians to create
    mdl:               IMP Model
    ps:                list of particles to be decorated. if empty, will add
    num_iter:          number of EM iterations
    covariance_type:   covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
    min_covar:         assign a minimum value to covariance term. That is used to have more spherical
                       shaped gaussians
    init_centers:      initial coordinates of the GMM
    force_radii:       fix the radii (spheres only)
    force_weight:      fix the weights
    mass_multiplier:   multiply the weights of all the gaussians by this value
    dirichlet:         use the DGMM fitting (can reduce number of components, takes longer)
    """


    new_sklearn = False
    try:
        from sklearn.mixture import GMM
    except ImportError:
        from sklearn.mixture import GaussianMixture
        new_sklearn = True

    print('creating GMM with n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
    if new_sklearn:
        # aic() calls size() on points, so it needs to a numpy array, not a list
        points = np.array(points)
        weights_init = precisions_init = None
        if force_radii != -1.0:
            print('warning: radii can no longer be forced, but setting '
                  'initial values to ', force_radii)
            precisions_init = np.array([[1./force_radii]*3
                                       for i in range(n_components)])
        if force_weight != -1.0:
            print('warning: weights can no longer be forced, but setting '
                  'initial values to ', force_weight)
            weights_init = np.array([force_weight]*n_components)

        gmm = GaussianMixture(n_components=n_components,
                              max_iter=num_iter,
                              covariance_type=covariance_type,
                              weights_init=weights_init,
                              precisions_init=precisions_init,
                              means_init=None if init_centers==[]
                                              else init_centers)
    else:
        params='m'
        init_params='m'
        if force_radii==-1.0:
            params+='c'
            init_params+='c'
        else:
            covariance_type='spherical'
            print('forcing spherical with radii',force_radii)

        if force_weight==-1.0:
            params+='w'
            init_params+='w'
        else:
            print('forcing weights to be',force_weight)

        gmm = GMM(n_components=n_components, n_iter=num_iter,
                  covariance_type=covariance_type, min_covar=min_covar,
                  params=params, init_params=init_params)
        if force_weight!=-1.0:
            gmm.weights_=np.array([force_weight]*n_components)
        if force_radii!=-1.0:
            gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
        if init_centers!=[]:
            gmm.means_=init_centers
    print('fitting')
    model=gmm.fit(points)
    score=gmm.score(points)
    akaikescore=model.aic(points)
    #print('>>> GMM score',gmm.score(points))

    ### convert format to core::Gaussian
    if new_sklearn:
        covars = gmm.covariances_
    else:
        covars = gmm.covars_
    for ng in range(n_components):
        covar=covars[ng]
        if covar.size==3:
            covar=np.diag(covar).tolist()
        else:
            covar=covar.tolist()
        center=list(gmm.means_[ng])
        weight=mass_multiplier*gmm.weights_[ng]
        if ng>=len(ps):
            ps.append(IMP.Particle(mdl))
        shape=IMP.algebra.get_gaussian_from_covariance(covar,IMP.algebra.Vector3D(center))
        g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
        IMP.atom.Mass.setup_particle(ps[ng],weight)
        IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))

    return (score,akaikescore)