Ejemplo n.º 1
0
def get_P_of_Data_Given_Param(means,
                              covs,
                              weights,
                              X,
                              method='scipy'):  # P(Data | Param)
    samples = zip(range(0, len(X)), X)
    p = {}
    if method == 'scipy':
        g = [
            multivariate_normal(mean=means[k],
                                cov=covs[k],
                                allow_singular=False)
            for k in range(0, len(weights))
        ]
        gaussians = {}
        for index, x in samples:
            gaussians[index] = np.array([g_k.pdf(x) for g_k in g])
        for index, x in samples:
            probabilities = np.multiply(gaussians[index], weights)
            probabilities = probabilities / np.sum(probabilities)
            p[index] = probabilities
    else:
        gmm = GaussianMixture(n_components=len(weights),
                              covariance_type='diag').fit(X)
        gmm.precisions_cholesky_ = 1  # crude way to make GMM think that model is fit
        gmm.means_ = means
        gmm.covariances_ = covs
        gmm.weights_ = weights

        for index, x in samples:
            x = x.reshape(1, -1)
            likelihood_ratio = gmm.predict_proba(x.reshape(1, -1))
            # likelihood_ratio = likelihood_ratio / np.sum(likelihood_ratio)
            p[index] = likelihood_ratio
    return p
    def log_prob(self, x, t, feature, samples):
        observations = x[:, :t]
        p_s_past = {}  #p(s_t-1|X_{0:t-1})
        for st in self.states:
            p_s_past[st], _, _ = fwd_bkw(observations, self.states,
                                         self.start_probability,
                                         self.transition_probability,
                                         self.emission_probability, st)

        p_currstate_past = {}
        for s in self.states:
            p_currstate_past[s] = 0.

        for curr_state in self.states:
            for st in self.states:
                p_currstate_past[curr_state] += self.transition_probability[
                    curr_state][st] * p_s_past[st]

        gmm = GaussianMixture(n_components=len(self.states),
                              covariance_type='full')
        gmm.fit(np.random.randn(10, observations.shape[0]))
        gmm.weights_ = list(p_currstate_past.values())
        gmm.means_ = np.array(self.mean)
        gmm.covariances_ = np.array(self.cov)
        for i in range(2):
            gmm.precisions_[i] = np.linalg.inv(gmm.covariances_[i])
            gmm.precisions_cholesky_[i] = np.linalg.cholesky(
                gmm.covariances_[i])

        return gmm.score_samples(samples)
Ejemplo n.º 3
0
def get_3d_grid_gmm(subdivisions=[5, 5, 5], variance=0.04):
    """
    Compute the weight, mean and covariance of a gmm placed on a 3D grid
    :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [
        1.0 / (subdivisions[0]), 1.0 / (subdivisions[1]),
        1.0 / (subdivisions[2])
    ]

    means = np.mgrid[step[0] - 1:1.0 - step[0]:complex(0, subdivisions[0]),
                     step[1] - 1:1.0 - step[1]:complex(0, subdivisions[1]),
                     step[2] - 1:1.0 - step[2]:complex(0, subdivisions[2])]
    means = np.reshape(means, [3, -1]).T
    covariances = variance * np.ones_like(means)
    weights = (1.0 / n_gaussians) * np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm
Ejemplo n.º 4
0
def sample_gaussian_mixture(pis, sigmas, mus, num_samples, n_features):
    """
    return: array of size (batch_size,num_samples*n_features) containing samples 
    taken from the gaussian mixture parameratized by pis, sigmas, mus
        e.g
        input 1 [[ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ],
        input 2  [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ],
        input 3  [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ],
        .        [...............................................],
        input n  [ s1_f1,s1_f2,s1_f3 | s2_f1, s2_f2, s2_f3 |.... ]]
    """
    # Gaussian PDF parameters
    batch_size = pis.shape[0]
    num_mixtures = pis.shape[1]
    samples = np.zeros((batch_size, num_samples * n_features))
    gmm = GaussianMixture(n_components=num_mixtures,
                          covariance_type='spherical')
    gmm.fit(np.random.rand(10, 1))  # Now it thinks it is trained
    for i in range(batch_size):
        gmm.weights_ = pis[i]
        gmm.means_ = mus[i].reshape(num_mixtures, n_features)
        gmm.covariances_ = np.expand_dims(sigmas[i], axis=1)**2
        sample = gmm.sample(num_samples)
        samples[i] = np.ravel(sample[0])
    return Variable(torch.from_numpy(samples))
Ejemplo n.º 5
0
    def test_fit(self):
        expected_means = np.array([-55., 0., 7.])
        expected_stds = np.array([3., .5, 1.])

        from sklearn.mixture import GaussianMixture
        gmm = GaussianMixture(n_components=3, means_init=expected_means)
        gmm.means_ = expected_means[..., None]
        gmm.covariances_ = np.array(expected_stds[..., None, None])
        gmm.weights_ = np.array([1 / 3, 1 / 3, 1 / 3])

        obs = gmm.sample(100000 + np.random.randint(-3, 3))[0].squeeze()

        init = deeptime.markov.hmm.init.gaussian.from_data(obs,
                                                           n_hidden_states=3,
                                                           reversible=True)
        hmm_est = deeptime.markov.hmm.MaximumLikelihoodHMM(init)
        hmm = hmm_est.fit(obs).fetch_model()

        np.testing.assert_array_almost_equal(
            hmm.transition_model.transition_matrix, np.eye(3), decimal=3)
        m = hmm.output_model
        for mean, sigma in zip(m.means, m.sigmas):
            # find the mean closest to this one (order might have changed)
            mean_ix = np.argmin(np.abs(expected_means - mean))
            np.testing.assert_almost_equal(mean,
                                           expected_means[mean_ix],
                                           decimal=1)
            np.testing.assert_almost_equal(sigma * sigma,
                                           expected_stds[mean_ix],
                                           decimal=1)
Ejemplo n.º 6
0
 def toGaussianMixture(self):
     g = GaussianMixture(self.n)
     g.fit(np.random.rand(2 * self.n).reshape((-1, 1)))
     g.weights_ = np.array(self.weights)
     g.means_ = np.array(self.means)[:, np.newaxis]
     g.covariances_ = np.array(self.covariances)[:, np.newaxis, np.newaxis]
     return g
Ejemplo n.º 7
0
def gmm_scale(gmm, shift=None, scale=None, reverse=False, params=None):
    """
    Apply scaling factors to GMM instances.

    Parameters
    ----------
    gmm : GaussianMixture
        GMM instance to be scaled.
    shift : int, float, optional
        Shift for the entire model. Default is 0 (no shift).
    scale : int, float, optional
        Scale for all components. Default is 1 (no scale).
    reverse : bool, optional
        Whether the GMM should be reversed.
    params
        GaussianMixture params for initialization of new instance.

    Returns
    -------
    GaussianMixture
        Modified GMM instance.

    """

    # Fetch parameters if not supplied
    if params is None:
        # noinspection PyUnresolvedReferences
        params = gmm.get_params()

    # Instantiate new GMM
    gmm_new = GaussianMixture(**params)

    # Create scaled fitted GMM model
    gmm_new.weights_ = gmm.weights_

    # Apply shift if set
    gmm_new.means_ = gmm.means_ + shift if shift is not None else gmm.means_

    # Apply scale
    if scale is not None:
        gmm_new.means_ /= scale

    gmm_new.covariances_ = gmm.covariances_ / scale ** 2 if scale is not None else gmm.covariances_
    gmm_new.precisions_ = np.linalg.inv(gmm_new.covariances_) if scale is not None else gmm.precisions_
    gmm_new.precisions_cholesky_ = np.linalg.cholesky(gmm_new.precisions_) if scale is not None \
        else gmm.precisions_cholesky_

    # Reverse if set
    if reverse:
        gmm_new.means_ *= -1

    # Add converged attribute if available
    if gmm.converged_:
        gmm_new.converged_ = gmm.converged_

    # Return scaled GMM
    return gmm_new
Ejemplo n.º 8
0
    def return_copy(self):
        '''If any trouble be sure that assignation of 
            means and weights is done copying through assignation        
        '''
        copy_tmp = GMM(n_components=self.n_components)

        copy_tmp.covariances_ = self.covariances_  #_get_covars()
        copy_tmp.means_ = self.means_
        copy_tmp.weights_ = self.weights_

        return copy_tmp
def multimod_emd_from_gmm(means, sigmas, weights):
    means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0]
    sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0]
    weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0]
    gmm = GaussianMixture(n_components=4, covariance_type='diag')
    gmm_vars = 2 * sigmas_stacked * sigmas_stacked
    precisions_cholesky = _compute_precision_cholesky(gmm_vars, 'diag')
    gmm.weights_ = weights_stacked
    gmm.means_ = means_stacked
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = gmm_vars
    y_sampled, _ = gmm.sample(1000)
    return wemd_from_pred_samples(y_sampled)
def get_multimodality_score(means, sigmas, weights):
    means_stacked = np.concatenate(means, axis=0)[:, :, 0, 0]
    sigmas_stacked = np.concatenate(sigmas, axis=0)[:, :, 0, 0]
    weights_stacked = np.concatenate(weights, axis=0)[:, 0, 0, 0]
    gmm = GaussianMixture(n_components=4, covariance_type='diag')
    vars = 2 * sigmas_stacked * sigmas_stacked
    precisions_cholesky = _compute_precision_cholesky(vars, 'diag')
    gmm.weights_ = weights_stacked
    gmm.means_ = means_stacked
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = vars
    gmm_uni = GaussianMixture(n_components=1, covariance_type='diag')
    argmax = np.argmax(gmm.weights_)
    gmm_uni.means_ = gmm.means_[argmax, :].reshape([1, 2])
    gmm_uni.covariances_ = gmm.covariances_[argmax, :].reshape([1, 2])
    gmm_uni.precisions_cholesky_ = gmm.precisions_cholesky_[argmax, :].reshape(
        [1, 2])
    gmm_uni.weights_ = np.array([1]).reshape([1])
    Z_uni = compute_histogram_gmm(gmm_uni)
    Z = compute_histogram_gmm(gmm)
    ratio = computeWEMD(Z, Z_uni)
    return ratio
Ejemplo n.º 11
0
def create_sklearn_gmm(weights, mean_tensor, cov_tensor, random_state=0):
    n_components = len(weights)
    gmm = GaussianMixture(n_components=n_components,
                          covariance_type='full',
                          random_state=random_state)
    gmm.weights_ = weights.numpy()
    gmm.means_ = mean_tensor.numpy()
    gmm.covariances_ = cov_tensor.numpy()
    gmm.precisions_ = np.array(
        [np.linalg.inv(cov) for cov in gmm.covariances_])
    gmm.precisions_cholesky_ = np.array(
        [np.linalg.cholesky(prec) for prec in gmm.precisions_])
    return gmm
Ejemplo n.º 12
0
    def jsd_diss(self, w1, mu1, cov1, w2, mu2, cov2):
        """
		Calculates Jensen-Shannon divergence of two gmm's
		:param gmm_p: mixture.GaussianMixture
		:param gmm_q: mixture.GaussianMixture
		:param sample_count: number of monte carlo samples to use
		:return: Jensen-Shannon divergence
		"""
        gmm_p = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_p.weights_ = w1
        gmm_p.covariances_ = cov1
        gmm_p.means_ = mu1
        gmm_p.n_components = 1
        gmm_p.precisions_cholesky_ = _compute_precision_cholesky(cov1, "full")

        gmm_q = GaussianMixture(n_components=n_components,
                                covariance_type="full")
        gmm_q.weights_ = w2
        gmm_q.covariances_ = cov2
        gmm_q.means_ = mu2
        gmm_q.n_components = 1
        gmm_q.precisions_cholesky_ = _compute_precision_cholesky(cov2, "full")

        X = gmm_p.sample(sample_count)[0]
        log_p_X = gmm_p.score_samples(X)
        log_q_X = gmm_q.score_samples(X)
        log_mix_X = np.logaddexp(log_p_X, log_q_X)

        Y = gmm_q.sample(sample_count)[0]
        log_p_Y = gmm_p.score_samples(Y)
        log_q_Y = gmm_q.score_samples(Y)
        log_mix_Y = np.logaddexp(log_p_Y, log_q_Y)

        # black magic?
        return (log_p_X.mean() -
                (log_mix_X.mean() - np.log(2)) + log_q_Y.mean() -
                (log_mix_Y.mean() - np.log(2))) / 2
Ejemplo n.º 13
0
def generate_params_gmm(weight, mean, cov, use_cdf=False):
    gmm = GaussianMixture(n_components=weight.size)
    gmm.weights_ = weight
    gmm.means_ = mean
    gmm.covariances_ = cov
    # Pass the fit check
    gmm.precisions_cholesky_ = None
    params = gmm.sample()[0][0]
    if use_cdf:
        params = ndtr(params)
    else:
        params = np.clip(params, 0, 1)
    params = params.tolist()
    return params
Ejemplo n.º 14
0
def generate_equal_weight_GMM(H_mu, H_var, covariance_type='diag'):
    n_components = len(H_mu)
    weights_init = len(H_mu) * [1. / len(H_mu)]
    GMM = GaussianMixture(n_components=n_components,
                          covariance_type=covariance_type, n_init=0, weights_init=None, means_init=None,
                          precisions_init=None, random_state=None, warm_start=True, verbose=0,
                          verbose_interval=10)
    GMM.weights_ = weights_init
    GMM.means_ = H_mu
    GMM.covariances_ = H_var
    GMM.precisions_cholesky_ = _compute_precision_cholesky(
        H_var, covariance_type)

    return GMM
Ejemplo n.º 15
0
def gmm_loglik(y,pi,mu,sigma,K):
    model = GaussianMixture(K, covariance_type = 'diag')
    model.fit(y)
    N = np.shape(mu)[0]
    N_test = np.shape(y)[0]
    ll_test = np.zeros(N)
    for i in (range(N)):
        model.means_ = mu[i,:]
        model.covariances_ = sigma[i,:]**2
        model.precisions_ = 1/(sigma[i,:]**2)
        model.weights_ = pi[i,:]
        model.precisions_cholesky_ = _compute_precision_cholesky(model.covariances_, model.covariance_type)
        ll_test[i] = model.score(y) 
    return ll_test*N_test
Ejemplo n.º 16
0
  def _sample_rows_same(self, X):
    """ uses efficient sklearn implementation to sample from gaussian mixture -> only works if all rows of X are the same"""
    weights, locs, scales = self._get_mixture_components(np.expand_dims(X[0], axis=0))

    # make sure that sum of weights < 1
    weights = weights.astype(np.float64)
    weights = weights / np.sum(weights)

    gmm = GaussianMixture(n_components=self.n_centers, covariance_type='diag', max_iter=5, tol=1e-1)
    gmm.fit(np.random.normal(size=(100,self.ndim_y))) # just pretending a fit
    # overriding the GMM parameters with own params
    gmm.converged_ = True
    gmm.weights_ = weights[0]
    gmm.means_ = locs[0]
    gmm.covariances_ = scales[0]
    y_sample, _ = gmm.sample(X.shape[0])
    assert y_sample.shape == (X.shape[0], self.ndim_y)
    return X, y_sample
Ejemplo n.º 17
0
def read_pred():
    means = readFloat('%s-mixture_distribution_means.float3' %
                      predition_path)  # shape (4, 2)
    sigmas = readFloat('%s-mixture_distribution_sigmas.float3' %
                       predition_path)  # shape (4, 2)
    weights = readFloat('%s-mixture_distribution_weights.float3' %
                        predition_path)  # shape (4)

    sigmas = 2 * sigmas * sigmas

    gmm = GaussianMixture(n_components=4, covariance_type='diag')

    precisions_cholesky = _compute_precision_cholesky(sigmas, 'diag')
    gmm.weights_ = weights
    gmm.means_ = means
    gmm.precisions_cholesky_ = precisions_cholesky
    gmm.covariances_ = sigmas

    return gmm
Ejemplo n.º 18
0
def EM_Process(data, n, covt):
    '''
    data: array shape data. 
    n: the number of components
    covt: covariance_type 
    {‘full’, ‘tied’, ‘diag’, ‘spherical’}
    chose one of them. 
    
    '''

    GM = GaussianMixture(n_components=n,
                         covariance_type=covt,
                         max_iter=600,
                         random_state=3)
    GM.means_ = np.zeros(3)
    GM.covariances_ = np.identity(3)
    GM.fit(data)
    clusters = GM.predict(data)

    return clusters
Ejemplo n.º 19
0
    def train(self, train_data):
        # 1. Create a GMM object and specify the number of components (classes) in the object
        # 2. Fit the model to our training data. NOTE: You may need to reshape with np.reshape(-1,1)
        # 3. Return None
        data = np.array(train_data).reshape(-1, 1)
        gmm = GaussianMixture(n_components=2)
        fit = gmm.fit(data)

        sort_indices = gmm.means_.argsort(axis=0)
        order = sort_indices[:, 0]
        gmm.means_ = gmm.means_[order, :]
        gmm.covariances_ = gmm.covariances_[order, :]
        w = np.split(gmm.weights_, 2)
        w = np.asarray(w)
        w = np.ravel(w[order, :])
        gmm.weights_ = w

        self.__model = gmm

        return
Ejemplo n.º 20
0
def test_once_by_random_features():
    Xtrain = numpy.random.random_sample((5000)).reshape(-1, 10)
    Xtest = numpy.random.random_sample((500)).reshape(-1, 10)

    gmm_orig = GaussianMixture(n_components=8, random_state=1)
    gmm_copy = GaussianMixture()

    gmm_orig.fit(Xtrain)

    gmm_copy.weights_ = gmm_orig.weights_
    gmm_copy.means_ = gmm_orig.means_
    gmm_copy.covariances_ = gmm_orig.covariances_
    gmm_copy.precisions_ = gmm_orig.precisions_
    gmm_copy.precisions_cholesky_ = gmm_orig.precisions_cholesky_
    gmm_copy.converged_ = gmm_orig.converged_
    gmm_copy.n_iter_ = gmm_orig.n_iter_
    gmm_copy.lower_bound_ = gmm_orig.lower_bound_

    y_orig = gmm_orig.score_samples(Xtest)
    y_copy = gmm_copy.score_samples(Xtest)

    return all(y_orig == y_copy)
Ejemplo n.º 21
0
Archivo: em.py Proyecto: mIXs222/neuode
    def __init__(self, n_components, x, gmm_kwargs=dict()):
        # fit mixture model
        if isinstance(x, torch.Tensor):
            x = x.detach().numpy()
        bsize = x.shape[0]
        g = GaussianMixture(n_components=n_components, **gmm_kwargs) \
            .fit(x.reshape(bsize, -1))
        # g.covariances_ = [np.eye(x.shape[-1])] * bsize
        if len(g.covariances_.shape) == 2:
            g.covariances_ = np.array([np.diag(var) for var in g.covariances_])

        # extract mean and covariance to comppute pdf later
        self.log_weights = torch.Tensor(np.log(g.weights_))
        self.mnormals = [
            MultivariateNormal(
                torch.Tensor(mu),
                torch.Tensor(var)) for mu,
            var in zip(
                g.means_,
                g.covariances_)]
        self.means = torch.Tensor(g.means_)
        self.covariances = torch.Tensor(g.covariances_)
Ejemplo n.º 22
0
def fit_markov_chain(y,plot=False):
	y_0 = y[:-1]
	y_1 = y[1:]
	grad_0 = np.gradient(y_0)
	grad_1 = np.gradient(y_1)
	state_1 = grad_1[np.where(grad_0 < 0)] # instances where previous gradient was negative
	state_2 = grad_1[np.where(grad_0 > 0)] # instances where previous gradient was positive
	mean_1,std_1 = stats.norm.fit(state_1)
	mean_2,std_2 = stats.norm.fit(state_2)
	# Reshaping parameters to be suitable for sklearn.GaussianMixture
	means = np.array([mean_1,mean_2])
	means = means.reshape(2,1)
	y_GM = np.concatenate((state_2.reshape(-1,1),state_1.reshape(-1,1)))
	precisions =  [1/(std_1**2),1/(std_2**2)]	
	GM = GaussianMixture(n_components=2,covariance_type='spherical')
	GM.weights_ = [0.5,0.5]
	GM.means_ = means
	GM.covariances_ = [std_1,std_2]
	GM.precisions_ = precisions
	GM.precisions_cholesky_ = precisions	
	GM.converged_ = True
	if(plot):
		samples = GM.sample(5000)[0]
		fig,ax_list = plt.subplots(3,1)
		fig.set_size_inches(20,20)
		ax_list[0].hist(state_1,bins=70)
		ax_list[1].hist(state_2,bins=70)
		lnspc_1 = np.linspace(state_1.min(),state_1.max(),y.shape[0])
		gauss_1 = stats.norm.pdf(lnspc_1, mean_1, std_1)
		lnspc_2 = np.linspace(state_2.min(),state_2.max(),y.shape[0])
		gauss_2 = stats.norm.pdf(lnspc_2, mean_2, std_2)
		ax_list[0].plot(lnspc_1,gauss_1)
		ax_list[1].plot(lnspc_2,gauss_2)
		ax_list[0].scatter(mean_1,30)
		ax_list[1].scatter(mean_2,30)
		ax_list[2].hist(samples,bins=100)
		plt.show()
	return GM
Ejemplo n.º 23
0
def load_ubm(path):
    """
    Load UBM stored with save_ubm, returning
    GMM object and normalization vectors

    Parameters:
        path (str): Where to load UBM from
    Returns:
        ubm (sklearn.mixture.GaussianMixture): Trained GMM model
        means, stds (ndarray): Means and stds of variables to
            be stored along UBM for normalization purposes
    """
    data = np.load(path)
    n_components = data["ubm_means"].shape[0]
    cov_type = "diag" if data["ubm_covariances"].ndim == 2 else "full"
    ubm = GaussianMixture(n_components=n_components, covariance_type=cov_type)
    ubm.means_ = data["ubm_means"]
    ubm.weights_ = data["ubm_weights"]
    ubm.covariances_ = data["ubm_covariances"]
    ubm.precisions_ = data["ubm_precisions"]
    ubm.precisions_cholesky_ = data["ubm_precisions_cholesky"]
    means = data["means"]
    stds = data["stds"]
    return ubm, means, stds
Ejemplo n.º 24
0
def _create_gmm(k, means, weights, precisions=None, covariances=None):
    if covariances is None:
        precisions = np.array(precisions)
        covariances = np.linalg.pinv(precisions)
    elif precisions is None:
        covariances = np.array(covariances)
        precisions = np.linalg.pinv(covariances)

    gmm = GaussianMixture(n_components=k,
                          weights_init=weights,
                          means_init=means,
                          reg_covar=1e-2,
                          precisions_init=precisions,
                          max_iter=1,
                          warm_start=True)

    try:
        gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances,
                                                               'full')
    except Exception:
        c2 = covariances.copy()
        covariances = _singular_prevent_multiple(covariances)
        precisions = np.linalg.pinv(covariances)
        try:
            gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances,
                                                                   'full')
        except Exception:
            c2.dump('cov.npy')
            raise Exception('Problema na matriz! Dump no arquivo cov.npy')

    gmm.weights_ = weights
    gmm.means_ = means
    gmm.covariances_ = covariances
    gmm.precisions_ = precisions

    return gmm
Ejemplo n.º 25
0
    def _estimate_GMM(self, x, n_component_lim=[1, 3]):
        """
        Find the GMM that best fit the data in x using Bayesian information criterion.
        """
        min_comp = n_component_lim[0]
        max_comp = n_component_lim[1]

        lowest_BIC = np.inf

        counter = 0
        for i_comp in range(min_comp, max_comp + 1):
            GMM = GaussianMixture(i_comp)
            GMM.fit(x)
            BIC = GMM.bic(x)
            if BIC < lowest_BIC:
                lowest_BIC = BIC
                best_GMM = GaussianMixture(i_comp)
                best_GMM.weights_ = GMM.weights_
                best_GMM.means_ = GMM.means_
                best_GMM.covariances_ = GMM.covariances_

            counter += 1

        return best_GMM
Ejemplo n.º 26
0
# create the 1D GMM profile for mag. susc.
means_init_mag = gmmref.means_[:, 1].reshape(3, 1)
cov_init_mag = np.array([gmmref.covariances_[:, 1]]).reshape((3, 1, 1))
clfmag = GaussianMixture(
    n_components=3,
    means_init=means_init_mag,
    precisions_init=cov_init_mag,
    n_init=1,
    max_iter=2,
    tol=np.inf,
)
# random fit, we set values after.
clfmag.fit(np.random.randn(10, 1))
clfmag.means_ = means_init_mag
clfmag.covariances_ = cov_init_mag
clfmag.precisions_cholesky_ = _compute_precision_cholesky(
    clfmag.covariances_, clfmag.covariance_type)
clfmag.weights_ = gmmref.weights_
testXplot_mag = np.linspace(-0.01, 1.2, 1000)[:, np.newaxis]
score_mag = clfmag.score_samples(testXplot_mag)
ax3.plot(np.exp(score_mag), testXplot_mag, linewidth=3.0, c="k")

ax3.set_xlim([0.0, 50])
ax3.set_xlabel("1D Probability Density values",
               fontsize=labelsize,
               rotation=-45,
               labelpad=0,
               x=0.5)
ax2.set_xlabel("Density (g/cc)", fontsize=labelsize)
ax3.set_ylabel("Magnetic Susceptibility (SI)", fontsize=labelsize)
Ejemplo n.º 27
0
def objective_2():
    # objective 2
    filenames = glob.glob('Lab 7/Objective 2/data/*.csv')
    subjects = get_subjects(filenames)

    trials = [str(x) for x in range(1, 6)]

    for subject in subjects:
        subjects_validate = subject
        subjects_train = [x for x in subjects if x != subject]
        # print(subjects_validate, subjects_train)

        # 2
        gmm_validate_t, gmm_validate_ir = load_files(subjects_validate, trials,
                                                     filenames, 25)
        gmm_train_t, gmm_train_ir = load_files(subjects_train, trials,
                                               filenames, 25)

        # 3
        title = 'Training with ' + subjects_validate + ' Left out'
        filename = 'training_data_' + subjects_validate + '.png'
        plot(gmm_train_t,
             gmm_train_ir,
             title=title,
             filename=filename,
             slice_window=None)

        # 4
        title = 'Histogram with ' + subjects_validate + ' Left out'
        filename = 'hist_training_' + subjects_validate + '.png'
        hist(gmm_train_t, gmm_train_ir, title=title, filename=filename)

        # 5
        data_train = np.array(gmm_train_ir).reshape(-1, 1)
        data_validate = np.array(gmm_validate_ir).reshape(-1, 1)

        # create gmm and fit
        gmm = GaussianMixture(n_components=2)
        fit = gmm.fit(data_train)

        # sort the order of means
        # https://stackoverflow.com/questions/37008588/sklearn-gmm-classification-prediction-component-assignment-order
        sort_indices = gmm.means_.argsort(axis=0)
        order = sort_indices[:, 0]
        gmm.means_ = gmm.means_[order, :]
        gmm.covariances_ = gmm.covariances_[order, :]
        w = np.split(gmm.weights_, 2)
        w = np.asarray(w)
        w = np.ravel(w[order, :])
        gmm.weights_ = w

        title = 'IR Signal Histogram \n Individual with ' + subjects_validate + ' Left out'
        filename = 'hist_individual_' + subjects_validate + '.png'
        hist_gmm(data_train,
                 gmm,
                 plot_sum=False,
                 title=title,
                 filename=filename)

        title = 'IR Signal Histogram \n Sum with ' + subjects_validate + ' Left out'
        filename = 'hist_sum_' + subjects_validate + '.png'
        hist_gmm(data_train,
                 gmm,
                 plot_sum=True,
                 title=title,
                 filename=filename)

        #6
        predictions_train = gmm.predict(data_train)
        predictions_validate = gmm.predict(data_validate)

        # print(predictions_train)
        # print(predictions_validate)

        plt.ion()

        plt.cla()
        plt.plot(gmm_train_t[window],
                 data_train[window],
                 color='green',
                 label='Voltage')
        plt.plot(gmm_train_t[window],
                 predictions_train[window],
                 color='red',
                 label='Prediction')
        title = 'Prediction of Training Set \n with ' + subjects_validate + ' Left out'
        plt.title(title)
        plt.legend(loc='lower left', fontsize='small')
        plt.xlabel('Time')
        plt.ylabel('Reading')
        plt.pause(0.5)
        plt.show()
        filename = 'gmm_train_labeled_' + subjects_validate + '.png'
        plt.savefig(filename)

        plt.cla()
        plt.plot(gmm_validate_t[window],
                 data_validate[window],
                 color='green',
                 label='Voltage')
        plt.plot(gmm_validate_t[window],
                 predictions_validate[window],
                 color='red',
                 label='Prediction')
        title = 'Prediction of Validation Set \n with ' + subjects_validate + ' Left out'
        plt.title(title)
        plt.legend(loc='lower left', fontsize='small')
        plt.xlabel('Time')
        plt.ylabel('Reading')
        plt.pause(0.5)
        plt.show()
        filename = 'gmm_validate_labeled_' + subjects_validate + '.png'
        plt.savefig(filename)
Ejemplo n.º 28
0
    def fit(self, data):
        #train drifted data
        #print("fitting.......")
        best_gmm = self.trainBestModel(data)
        if (self.initialized == False):
            self.weights_ = best_gmm.weights_
            self.covariances_ = best_gmm.covariances_  # self.covariances_ = gmm._get_covars()
            self.means_ = best_gmm.means_
            self.n_components = best_gmm.n_components
            self.precisions_cholesky_ = _compute_precision_cholesky(
                best_gmm.covariances_, "full")
            #backup for later
            self.backup_weights_ = best_gmm.weights_
            self.backup_covariances_ = best_gmm.covariances_  # self.covariances_ = gmm._get_covars()
            self.backup_means_ = best_gmm.means_
            self.backup_n_components = best_gmm.n_components
            self.backup_precisions_cholesky_ = self.precisions_cholesky_
            self.initialized = True

        else:
            #print("adapted.........",self.backup_weights_)
            w_all = np.concatenate((self.backup_weights_, best_gmm.weights_),
                                   axis=None)
            #w_all = w_all/np.sum(w_all)
            mu_all = np.concatenate((self.backup_means_, best_gmm.means_),
                                    axis=0)
            cov_all = np.concatenate(
                (self.backup_covariances_, best_gmm.covariances_), axis=0)

            #n_comp =5
            n_components_range = range(self.max_components + 1,
                                       self.min_components, -1)
            bicreduced = []
            lowest_bic = np.infty
            jumlahSample = 1 * len(data)
            if (jumlahSample <= 100):
                jumlahSample = 100

            currentSample = self.sample(jumlahSample)[0]
            Xall = np.concatenate((currentSample, data), axis=0)

            for n_components in n_components_range:
                w, m, c = self.mixture_reduction(w_all,
                                                 mu_all,
                                                 cov_all,
                                                 n_components,
                                                 isomorphic=True,
                                                 verbose=False,
                                                 optimization=False)
                gmm_p = GaussianMixture(n_components=n_components,
                                        covariance_type="full")
                gmm_p.weights_ = w
                gmm_p.covariances_ = c
                gmm_p.means_ = m
                gmm_p.precisions_cholesky_ = _compute_precision_cholesky(
                    c, "full")
                bic_ = gmm_p.bic(data)
                bicreduced.append(bic_)
                if self.verbose:
                    print('REDUCD BIC components {0} = {1}'.format(
                        n_components, bic_))
                if bic_ < lowest_bic:
                    lowest_bic = bic_
                    best_gmm = gmm_p

            #print(best_gmm.n_components)

            self.weights_ = best_gmm.weights_ / np.sum(best_gmm.weights_)
            self.means_ = best_gmm.means_
            self.covariances_ = best_gmm.covariances_
            self.n_components = best_gmm.n_components
            #Compute the Cholesky decomposition of the precisions.
            self.precisions_cholesky_ = _compute_precision_cholesky(
                best_gmm.covariances_, self.covariance_type)
def main(args):
    file_name = os.path.join("BetaCostFunction", args.dist + "_" + args.file)
    n_iter = args.n_iter
    n_samples = args.samples

    if args.dist == "beta1":
        # Define the true distribution
        dist = st.beta(a=2, b=2, loc=-1, scale=2)
    elif args.dist == "beta2":
        # Define the true distribution
        dist = st.beta(a=5, b=5, loc=-1, scale=2)
    elif args.dist == "beta3":
        # Define the true distribution
        dist = st.beta(a=2, b=5, loc=-1, scale=2)
    elif "gmm" in args.dist:
        if args.dist == "gmm1":
            mu1 = -0.5
            sigma1 = 0.15
            mu2 = 0.4
            sigma2 = 0.3
            p = 0.6
        elif args.dist == "gmm2":
            mu1 = -0.1
            sigma1 = 0.3
            mu2 = 0.4
            sigma2 = 0.1
            p = 0.7
        dist = GaussianMixture(n_components=2, covariance_type="spherical")
        dist.weights_ = np.array([p, 1 - p])
        dist.means_ = np.array([[mu1], [mu2]])
        dist.covariances_ = np.array([sigma1**2, sigma2**2])
        dist.precisions_cholesky_ = np.linalg.cholesky(
            np.linalg.inv([[sigma1**2, 0], [0, sigma2**2]]))

    chosen_functions = [0, 1, 2, 4, 5, 7, 9, 15, 16, 18]
    parameter_bounds = None
    #parameter_bounds =((1,None),(1,None),(None,None),(None,None))
    cost_coefs = pd.read_csv(args.coef_file)

    N = [10, 25, 50]
    try:
        df = pd.read_csv(file_name, index_col=0)
    except:
        df = pd.DataFrame(
            columns=['w_star', 'method', 'function', 'N', "distribution"])
        df.to_csv(file_name)

    for z in range(len(chosen_functions)):
        coefs = cost_coefs.iloc[chosen_functions[z]].values
        new_f = lambda x, y: f_mean(x, y, coefs[0], coefs[1], coefs[2])
        saa = optimiser.SAA(new_f)
        bagging = optimiser.BaggingSolver(400, objective_function=new_f)
        if "gmm" in args.dist:
            mle = optimiser.GMMSolver(False,
                                      2,
                                      objective_function=new_f,
                                      n_to_sample=n_samples)
            bayes = optimiser.GMMSolver(True,
                                        2,
                                        objective_function=new_f,
                                        n_to_sample=n_samples)
        else:
            mle = optimiser.MLESolver(st.beta,
                                      parameter_bounds=parameter_bounds,
                                      objective_function=new_f,
                                      n_to_sample=n_samples,
                                      floc=-1,
                                      fscale=2)
            bayes = optimiser.BetaBayesianSolver(objective_function=new_f,
                                                 n_to_sample=n_samples)
        kde = optimiser.KDESolver(n_samples, objective_function=new_f)
        methods = [bagging, mle, saa, kde, bayes]
        for k in range(n_iter):
            for j in range(len(N)):
                if "gmm" in args.dist:
                    samples = dist.sample(N[j])[0].reshape(-1)
                else:
                    samples = dist.rvs(N[j])
                for ele in methods:
                    w_star = ele.solve(samples, initial_conditions=0.0)[0]
                    results = {
                        'w_star': w_star,
                        'method': ele.__str__(),
                        'function': chosen_functions[z] + 1,
                        'N': N[j],
                        "distribution": args.dist
                    }
                    df = df.append(results, ignore_index=True)
            # save the results so far
            df.to_csv(file_name)
Ejemplo n.º 30
0
    def _fit(self, X: np.ndarray) -> np.ndarray:
        pred = self._cluster_and_decide(X)
        self.children: Tuple["DivisiveCluster"] = cast(
            Tuple["DivisiveCluster"], tuple())

        uni_labels = np.unique(pred)
        labels = pred.reshape((-1, 1)).copy()
        if len(uni_labels) > 1:
            for ul in uni_labels:
                inds = pred == ul
                new_X = X[inds]
                dc = DivisiveCluster(
                    cluster_method=self.cluster_method,
                    max_components=self.max_components,
                    min_split=self.min_split,
                    max_level=self.max_level,
                    cluster_kws=self.cluster_kws,
                    delta_criter=self.delta_criter,
                )
                dc.parent = self
                if (len(new_X) > self.max_components
                        and len(new_X) >= self.min_split
                        and self.depth + 1 < self.max_level):
                    child_labels = dc._fit(new_X)
                    while labels.shape[1] <= child_labels.shape[1]:
                        labels = np.column_stack(
                            (labels, np.zeros((len(X), 1), dtype=int)))
                    labels[inds, 1:child_labels.shape[1] + 1] = child_labels
                else:
                    # make a "GaussianMixture" model for clusters
                    # that were not fitted
                    if self.cluster_method == "gmm":
                        cluster_idx = len(dc.parent.children) - 1
                        parent_model = dc.parent.model_
                        model = GaussianMixture()
                        model.weights_ = np.array([1])
                        model.means_ = parent_model.means_[
                            cluster_idx].reshape(1, -1)
                        model.covariance_type = parent_model.covariance_type
                        if model.covariance_type == "tied":
                            model.covariances_ = parent_model.covariances_
                            model.precisions_ = parent_model.precisions_
                            model.precisions_cholesky_ = (
                                parent_model.precisions_cholesky_)
                        else:
                            cov_types = ["spherical", "diag", "full"]
                            n_features = model.means_.shape[-1]
                            cov_shapes = [
                                (1, ),
                                (1, n_features),
                                (1, n_features, n_features),
                            ]
                            cov_shape_idx = cov_types.index(
                                model.covariance_type)
                            model.covariances_ = parent_model.covariances_[
                                cluster_idx].reshape(cov_shapes[cov_shape_idx])
                            model.precisions_ = parent_model.precisions_[
                                cluster_idx].reshape(cov_shapes[cov_shape_idx])
                            model.precisions_cholesky_ = (
                                parent_model.precisions_cholesky_[cluster_idx].
                                reshape(cov_shapes[cov_shape_idx]))

                        dc.model_ = model

        return labels