Ejemplo n.º 1
0
 def classify(self,points):
     """ Classify the points by computing probabilities 
         for each class and return most probable label. """
     try:
         if(len(self.states.mean)==len(self.states.var)):
             mu = np.array(self.states.mean);
             cv = np.array(self.states.var);
             
             logProb = mixture.log_multivariate_normal_density(points, mu, cv, 'full');
             
             ndx = logProb.argmax(axis=1);
     
             est_labels = [];
             for n in ndx:
                 for k,v in self.states.labels.items():
                     if v==n:
                         est_labels.append(k);
                         
             #print 'est prob',est_prob.shape,self.states.labels;
             # get index of highest probability, this gives class label
             
             return est_labels, logProb;
         else:
             raise NameError('ERROR_SIZE');
     
     except NameError:
         print('BayesClassifier.train :: Array size mismatch.\n');
         traceback.print_exc(file=sys.stdout);
         sys.exit(0);
def forcedAlignment(lmfcc, phoneHMMs, phoneTrans, filename):
    """ forcedAlignmen: aligns a phonetic transcription at the state level

    Args:
       lmfcc: NxD array of MFCC feature vectors (N vectors of dimension D)
              computed the same way as for the training of phoneHMMs
       phoneHMMs: set of phonetic Gaussian HMM models
       phoneTrans: list of phonetic symbols to be aligned including initial and
                   final silence

    Returns:
       list of strings in the form phoneme_index specifying, for each time step
       the state from phoneHMMs corresponding to the viterbi path.
    """
    utteranceHMM = concatHMMs(phoneHMMs, phoneTrans, digit=filename[:-1])
    stateTrans = [
        phone + '_' + str(stateid) for phone in phoneTrans
        for stateid in range(nstates[phone])
    ]

    from sklearn.mixture import log_multivariate_normal_density
    import warnings
    warnings.simplefilter('ignore')
    obsloglik = log_multivariate_normal_density(lmfcc, utteranceHMM['means'],
                                                utteranceHMM['covars'], 'diag')
    _, viterbiStateIdTrans = viterbi(
        obsloglik, np.log(utteranceHMM['startprob']),
        np.log(utteranceHMM['transmat'][:-1, :-1]))
    return [stateTrans[idx] for idx in viterbiStateIdTrans]
Ejemplo n.º 3
0
    def _compute_log_weighted_gaussian_densities(self, X, i_comp):
        cur_means = self.means_[i_comp]
        cur_covs = self.covars_[i_comp]
        if self.covariance_type == 'spherical':
            cur_covs = cur_covs[:, np.newaxis]
        log_cur_weights = np.log(self.weights_[i_comp])

        return log_multivariate_normal_density(
            X, cur_means, cur_covs, self.covariance_type) + log_cur_weights
Ejemplo n.º 4
0
def gaussian_log_likelihood(x, mean, covariance):
    covariance_diag = []
    for i in range(0, len(mean)):
        covariance_diag.append(covariance[i][i])
    likelihood = log_multivariate_normal_density(np.array([x]),
                                                 np.array([mean]),
                                                 np.array([covariance_diag]),
                                                 covariance_type = 'diag')
    return likelihood[0][0]
Ejemplo n.º 5
0
 def dup__div__(self, other):
     x = np.array([self._semantics])
     means = np.array([other._semantics])
     m, n = x.shape
     covars = np.ones((1, n))
     # print x
     # print means
     # print covars
     return log_multivariate_normal_density(x, means, covars)
Ejemplo n.º 6
0
def derGMMmodel(GMMmodel, UB):
    """
    Compute derivates of GMM model, respect to each corner as:
             sum(W*N(x,\mu,\Sigma)*(x - \mu).T inv(\Sigma))
    f'(x) =  -----------------------------------------------
                       sum(W*N(x,\mu,\Sigma))
    """
    outUB = UB
    U = UB[0:2]
    B = UB[2:4]
    #--- Compute deriv respect to Upper corner
    denU = np.exp(GMMmodel['Upper'].score(U.reshape(1,-1)))
    numU = np.sum(
            np.exp(
                mixture.log_multivariate_normal_density(
                    GMMmodel['Upper'].means_,
                    GMMmodel['Upper'].covars_,
                    GMMmodel['Upper'].covariance_type)
                ) 
            * GMMmodel['Upper'].weights_ 
            * (GMMmodel['Upper'].mean_ - U).T
            * np.linalg.inv(GMMmodel['Upper'].covars_),
            axis=0
            )
    outUB[0:2] = numU/denU

    #--- Compute deriv respect to Bottom corner
    denB = np.exp(GMMmodel['Bottom'].score(B.reshape(1,-1)))
    numB = np.sum(
            np.exp(
                mixture.log_multivariate_normal_density(
                    GMMmodel['Bottom'].means_,
                    GMMmodel['Bottom'].covars_,
                    GMMmodel['Bottom'].covariance_type)
                ) 
            * GMMmodel['Bottom'].weights_ 
            * (GMMmodel['Bottom'].mean_ - U).T
            * np.linalg.inv(GMMmodel['Bottom'].covars_),
            axis=0
            )
    outUB[2:4] = numB/denB


    return outUB
Ejemplo n.º 7
0
    def _compute_log_weighted_gaussian_densities(self, X, i_comp):
        cur_means = self.means_[i_comp]
        cur_covs = self.covars_[i_comp]
        if self.covariance_type == 'spherical':
            cur_covs = cur_covs[:, np.newaxis]
        log_cur_weights = np.log(self.weights_[i_comp])

        return log_multivariate_normal_density(
            X, cur_means, cur_covs, self.covariance_type
        ) + log_cur_weights
Ejemplo n.º 8
0
def pca_enc_score(pca_predy, pca_y, pca_cov):
    samples = pca_predy.shape[0]
    if samples != pca_y.shape[0]:
        raise RuntimeError('X and y need to have the same number of samples')
    log_likelihood = np.empty((samples,))
    for i in xrange(samples):
        log_likelihood[i] = log_multivariate_normal_density(
                pca_y[i][None,:], pca_predy[i][None,:],
                pca_cov[None,:,:], covariance_type='full')

    return log_likelihood
Ejemplo n.º 9
0
def test_lmvnpdf_spherical():
    n_features, n_components, n_samples = 2, 3, 10

    mu = rng.randint(10) * rng.rand(n_components, n_features)
    spherecv = rng.rand(n_components, 1)**2 + 1
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    cv = np.tile(spherecv, (n_features, 1))
    reference = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = mixture.log_multivariate_normal_density(X, mu, spherecv, 'spherical')
    assert_array_almost_equal(lpr, reference)
Ejemplo n.º 10
0
def test_lmvnpdf_spherical():
    n_features, n_components, n_samples = 2, 3, 10

    mu = rng.randint(10) * rng.rand(n_components, n_features)
    spherecv = rng.rand(n_components, 1) ** 2 + 1
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    cv = np.tile(spherecv, (n_features, 1))
    reference = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = mixture.log_multivariate_normal_density(X, mu, spherecv, "spherical")
    assert_array_almost_equal(lpr, reference)
Ejemplo n.º 11
0
    def posterior_prob(self, obs, with_noise=False):
        """posterior probabilities for data under the model

        :type obs: ndarray
        :param obs: observations to be evaluated [n, tf, nc]
        :type with_noise: bool
        :param with_noise: if True, include the noise cluster as component
            in the mixture.
            Default=False
        :rtype: ndarray
        :returns: matrix with per component posterior probabilities [n, c]
        """

        # check obs
        obs = sp.atleast_2d(obs)
        if len(obs) == 0:
            raise ValueError('no observations passed!')
        data = []
        if obs.ndim == 2:
            if obs.shape[1] != self._tf * self._nc:
                raise ValueError('data dimensions not compatible with model')
            for i in xrange(obs.shape[0]):
                data.append(obs[i])
        elif obs.ndim == 3:
            if obs.shape[1:] != (self._tf, self._nc):
                raise ValueError('data dimensions not compatible with model')
            for i in xrange(obs.shape[0]):
                data.append(mcvec_to_conc(obs[i]))
        data = sp.asarray(data, dtype=sp.float64)

        # build comps
        comps = self.get_template_set(mc=False)
        if with_noise:
            comps = sp.vstack((comps, sp.zeros((self._tf * self._nc))))
        comps = comps.astype(sp.float64)
        if len(comps) == 0:
            return sp.zeros((len(obs), 1))

        # build priors
        prior = sp.array([self._lpr_s] * len(comps), dtype=sp.float64)
        if with_noise:
            prior[-1] = self._lpr_n

        # get sigma
        try:
            sigma = self._ce.get_cmx(tf=self._tf).astype(sp.float64)
        except:
            return sp.zeros((len(obs), 1))

        # calc log probs
        lpr = log_multivariate_normal_density(data, comps, sigma,
                                              'tied') + prior
        logprob = logsumexp(lpr, axis=1)
        return sp.exp(lpr - logprob[:, sp.newaxis])
Ejemplo n.º 12
0
def test_lmvnpdf_full():
    n_features, n_components, n_samples = 2, 3, 10

    mu = rng.randint(10) * rng.rand(n_components, n_features)
    cv = (rng.rand(n_components, n_features) + 1.0) ** 2
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    fullcv = np.array([np.diag(x) for x in cv])

    reference = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = mixture.log_multivariate_normal_density(X, mu, fullcv, 'full')
    assert_array_almost_equal(lpr, reference)
Ejemplo n.º 13
0
def test_lmvnpdf_diag():
    # test a slow and naive implementation of lmvnpdf and
    # compare it to the vectorized version (mixture.lmvnpdf) to test
    # for correctness
    n_features, n_components, n_samples = 2, 3, 10
    mu = rng.randint(10) * rng.rand(n_components, n_features)
    cv = (rng.rand(n_components, n_features) + 1.0)**2
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    ref = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = mixture.log_multivariate_normal_density(X, mu, cv, 'diag')
    assert_array_almost_equal(lpr, ref)
Ejemplo n.º 14
0
def test_lmvnpdf_full():
    n_features, n_components, n_samples = 2, 3, 10

    mu = rng.randint(10) * rng.rand(n_components, n_features)
    cv = (rng.rand(n_components, n_features) + 1.0)**2
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    fullcv = np.array([np.diag(x) for x in cv])

    reference = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = mixture.log_multivariate_normal_density(X, mu, fullcv, 'full')
    assert_array_almost_equal(lpr, reference)
Ejemplo n.º 15
0
def getGMMProbs(GMM,argument,embeddings):
    #logs = mixture.log_multivariate_normal_density(np.array([embeddings[argument]]), GMM.means_, GMM.covars_, GMM.covariance_type)[0] + np.log(GMM.weights_)
    
    X = np.array([embeddings[argument]])
    
    lpr = (mixture.log_multivariate_normal_density(X, GMM.means_, GMM.covars_,
                                               GMM.covariance_type))
               
    probs = np.exp(lpr)
    
    
    return probs[0]
Ejemplo n.º 16
0
def test_lmvnpdf_diag():
    # test a slow and naive implementation of lmvnpdf and
    # compare it to the vectorized version (mixture.lmvnpdf) to test
    # for correctness
    n_features, n_components, n_samples = 2, 3, 10
    mu = rng.randint(10) * rng.rand(n_components, n_features)
    cv = (rng.rand(n_components, n_features) + 1.0) ** 2
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    ref = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = mixture.log_multivariate_normal_density(X, mu, cv, 'diag')
    assert_array_almost_equal(lpr, ref)
Ejemplo n.º 17
0
def main():

    a = concatHMMs(phoneHMMs, namelist=prondict['4'])
    data = np.load('lab2_data.npz')['data'][10]
    #loglik=example['obsloglik']
    #print()
    fakelog = log_multivariate_normal_density(data['lmfcc'], a['means'],
                                              a['covars'])
    #fakelog=log_multivariate_normal_density(example['lmfcc'],a['means'],a['covars'])
    #print(fakelog)
    #plt.pcolormesh(example['lmfcc'])
    #plt.show()

    #4.1
    #plt.pcolormesh(fakelog.transpose())
    #plt.colorbar()
    #plt.show()

    #4.2
    log_alpha = forward(fakelog, np.log(a['startprob']), np.log(a['transmat']))

    #print(log_alpha)
    #4.3
    x = viterbi(fakelog, np.log(a['startprob']), np.log(a['transmat']))

    #4.4
    log_beta = backward(fakelog, np.log(a['startprob']), np.log(a['transmat']))
    #print(log_beta)
    #5.1
    # print(log_alpha)
    # print("------------------------------")
    # print(log_beta)
    log_gamma = statePosteriors(log_alpha, log_beta)
    #print(log_gamma)
    #5.2
    mu, covar = updateMeanAndVar(data['lmfcc'], log_gamma)

    #new_fake_log=log_multivariate_normal_density(data['lmfcc'],mu,covar)
    new_fake_log = fakelog

    for x in range(1, 15):

        log_alpha = forward(new_fake_log, np.log(a['startprob']),
                            np.log(a['transmat']))
        log_beta = backward(new_fake_log, np.log(a['startprob']),
                            np.log(a['transmat']))

        log_gamma = statePosteriors(log_alpha, log_beta)
        mu, covar = updateMeanAndVar(data['lmfcc'], log_gamma)
        #covar=a['covars']
        new_fake_log = log_multivariate_normal_density_diag(
            data['lmfcc'], mu, covar)
Ejemplo n.º 18
0
 def logPdf(self, x):
     x = np.array([x], ndmin=2)
     return log_multivariate_normal_density(x, self.mean, self.covar)
reload(proto2)

tidigits = np.load('lab2_tidigits.npz')['tidigits']
models = np.load('lab2_models.npz')['models']
example = np.load('lab2_example.npz')['example'].item()

plt.figure(1)

# Plot Mfcc 
plt.subplot(511)
plt.imshow(example['mfcc'].transpose(), origin='lower', interpolation='nearest', aspect='auto')

# Compute 4: Multivariate Gaussian Density
gmm_obsloglik = skm.log_multivariate_normal_density(example['mfcc'],
                                models[0]['gmm']['means'],
                                models[0]['gmm']['covars'], 'diag')

hmm_obsloglik = skm.log_multivariate_normal_density(example['mfcc'],
                                models[0]['hmm']['means'],
                                models[0]['hmm']['covars'], 'diag')

plt.subplot(512)
plt.imshow(gmm_obsloglik.transpose(), origin='lower', interpolation='nearest', aspect='auto')
plt.subplot(513)
plt.imshow(hmm_obsloglik.transpose(), origin='lower', interpolation='nearest', aspect='auto')

# Compute 5 : GMM Likelihood and Recognition
# Retrieve example['gmm_loglik']
gmmloglik = proto2.gmmloglik(gmm_obsloglik, models[0]['gmm']['weights'])
Ejemplo n.º 20
0
from sklearn.mixture import log_multivariate_normal_density

plt.rcParams['image.cmap'] = 'jet'

tidigits = np.load('lab2_tidigits.npz', encoding='latin1')['tidigits']
models = np.load('lab2_models.npz', encoding='latin1')['models']

example = np.load('lab2_example.npz', encoding='latin1')['example'].item()
# plt.pcolormesh(example['hmm_obsloglik'].T)
# plt.show()

# (( 4 ))

# example using hmm: probabilities of gaussianans in mixture given samples
hmm_obsloglik = log_multivariate_normal_density(example['mfcc'],
                                                models[0]['hmm']['means'],
                                                models[0]['hmm']['covars'])
print(np.sum(hmm_obsloglik - example['hmm_obsloglik']))
# example using gmm: probabilities of gaussianans in mixture given samples
gmm_obsloglik = log_multivariate_normal_density(example['mfcc'],
                                                models[0]['gmm']['means'],
                                                models[0]['gmm']['covars'])
print(np.sum(gmm_obsloglik - example['gmm_obsloglik']))

# print(tidigits[0].keys())
# print(tidigits[10]['digit'])
# print(models[5]['digit'])
# Utterances and model corresponding to digit 'Four' : probabilities of gaussianans in mixture given samples
# hmm_obsloglik4 = log_multivariate_normal_density(tidigits[10]['mfcc'], models[5]['hmm']['means'],models[5]['hmm']['covars'])
# gmm_obsloglik4 = log_multivariate_normal_density(tidigits[10]['mfcc'], models[5]['gmm']['means'],models[5]['gmm']['covars'])
#
Ejemplo n.º 21
0
wm_wcereb.set_index(df.index,inplace=True)
wm_wcereb.columns = ['wm_wcereb']

wm_vs_summary = wm_wcereb.merge(summary_wcereb, left_index=True, right_index=True)

# 1D GMM
input_df = summary_wcereb
g_1 = GMM(n_components=2, 
        covariance_type='full',
        tol=1e-6,
        n_iter=700,
        params='wmc', 
        init_params='wmc')
g_1.fit(input_df)
# calculate prob, disregard weights
lpr = log_multivariate_normal_density(input_df,g_1.means_,g_1.covars_,g_1.covariance_type)
logprob = logsumexp(lpr,axis=1)
responsibilities = np.exp(lpr - logprob[:, np.newaxis])
probs = pd.DataFrame(responsibilities)
probs.set_index(input_df.index,inplace=True)
probs.columns = ['prob_0','prob_1']
probs.loc[:,'color'] = 'k'
probs.loc[probs.prob_0>=0.90, 'color'] = 'r'
probs.loc[probs.prob_1>=0.90, 'color'] = 'b'
# plot 1D GMM
delta= 0.0001
x = np.arange(0.5, 1.2, delta)
mu_1, sigma_1 = (g_1.means_[0][0],np.sqrt(g_1.covars_[0][0]))
mu_2, sigma_2 = (g_1.means_[1][0],np.sqrt(g_1.covars_[1][0]))
intervals_1 = norm.interval(0.95,loc=mu_1,scale=sigma_1)
intervals_2 = norm.interval(0.95,loc=mu_2,scale=sigma_2)
Ejemplo n.º 22
0
 def _compute_log_likelihood(self, X):
     return log_multivariate_normal_density(X, self.means_, self._covars_,
                                            self.covariance_type)
Ejemplo n.º 23
0
        s_val = np.vstack((s_val, (classifier.weights_*temp_val).sum(axis =1)))
        
        x_train = pd.DataFrame(s_train.T)
        x_test = pd.DataFrame(s_test.T)
        x_val = pd.DataFrame(s_val.T)
    x_train = x_train.drop(x_train.columns[[0]],axis = 1)
    x_test = x_test.drop(x_test.columns[[0]],axis = 1)
    x_val = x_val.drop(x_val.columns[[0]],axis = 1)
    return x_train, x_test, x_val         
        
        
s = classifier.score(X_train.todense())

from sklearn.utils.extmath import logsumexp
from sklearn import mixture
lpr = (mixture.log_multivariate_normal_density(X_train.todense(), classifier.means_, classifier.covars_, classifier.covariance_type) + np.log(classifier.weights_)) # probabilities of components
logprob = logsumexp(lpr, axis=1) # logsum to get probability of GMM
probs = np.exp(logprob) # 0 < probs < 1 

p = prior(y_train)
x = x_train + np.log(p)
x = x.astype(float)
z = x
r = logsumexp(d,axis = 1)
d = x.as_matrix()
x['logsum'] = r
z = np.exp(x.subtract(x['logsum'],axis=0).drop('logsum',1))



checkAccuracy(z,y_train)
Ejemplo n.º 24
0
    tw = open(dir_dataset + "topic_sanders_twitter.txt", "r")
    for l in tw:
        labels.append(l)
    i = 0
    num_topics = dict()
    for tp in set(labels):
        num_topics[tp] = i
        i = i + 1

    labels = [num_topics[x] for x in labels]
    words = [w for w, v in model.vocab.items()]
    word_vectors = model.syn0
    gmm_model = mix.GMM(n_components = k, n_iter = 1000, covariance_type = 'diag')
    gmm_model.fit(word_vectors)

    log_probs = mix.log_multivariate_normal_density(word_vectors, gmm_model.means_, gmm_model.covars_,
                                                gmm_model.covariance_type)
    word_topic = list()
    _, num_col = log_probs.shape
    for col in range(num_col):
        top_n = 10
        log_component_probs = (log_probs[:, col]).T
        sorted_indexes = np.argsort(log_component_probs)[::-1][:top_n]
        ordered_word_probs = [(model.index2word[idx], log_component_probs[idx]) for idx in sorted_indexes]
        word_topic.append([model.index2word[idx] for idx in sorted_indexes])

        print('---')
        print("Topic {0}".format(col + 1))
        print("Total prob:" + str(sum(log_component_probs)))
        print(", ".join(["{w}: {p}".format(w = w, p = p) for w, p in ordered_word_probs]))

Ejemplo n.º 25
0
 def _compute_log_likelihood(self, obs):
     return log_multivariate_normal_density(
         obs, self._means_, self._covars_, self._covariance_type)
Ejemplo n.º 26
0
def adjust(x, mu):
	means = np.array([mu])
	m,n = x.shape
	covars = 0.01*np.ones((1,n))
	p = log_multivariate_normal_density(x, means, covars)
	return np.sum((p > -0.5 )* 1)
Ejemplo n.º 27
0
def select(x, mu):
	means = np.array([mu])
	m,n = x.shape
	covars = 0.01*np.ones((1,n))
	p = log_multivariate_normal_density(x, means, covars)
	return p > -0.5
Ejemplo n.º 28
0
def normal(x, mu):
	means = np.array([mu])
	m,n = x.shape
	covars = 0.01*np.ones((1,n))
	return log_multivariate_normal_density(x, means, covars)