def classify(self,points): """ Classify the points by computing probabilities for each class and return most probable label. """ try: if(len(self.states.mean)==len(self.states.var)): mu = np.array(self.states.mean); cv = np.array(self.states.var); logProb = mixture.log_multivariate_normal_density(points, mu, cv, 'full'); ndx = logProb.argmax(axis=1); est_labels = []; for n in ndx: for k,v in self.states.labels.items(): if v==n: est_labels.append(k); #print 'est prob',est_prob.shape,self.states.labels; # get index of highest probability, this gives class label return est_labels, logProb; else: raise NameError('ERROR_SIZE'); except NameError: print('BayesClassifier.train :: Array size mismatch.\n'); traceback.print_exc(file=sys.stdout); sys.exit(0);
def forcedAlignment(lmfcc, phoneHMMs, phoneTrans, filename): """ forcedAlignmen: aligns a phonetic transcription at the state level Args: lmfcc: NxD array of MFCC feature vectors (N vectors of dimension D) computed the same way as for the training of phoneHMMs phoneHMMs: set of phonetic Gaussian HMM models phoneTrans: list of phonetic symbols to be aligned including initial and final silence Returns: list of strings in the form phoneme_index specifying, for each time step the state from phoneHMMs corresponding to the viterbi path. """ utteranceHMM = concatHMMs(phoneHMMs, phoneTrans, digit=filename[:-1]) stateTrans = [ phone + '_' + str(stateid) for phone in phoneTrans for stateid in range(nstates[phone]) ] from sklearn.mixture import log_multivariate_normal_density import warnings warnings.simplefilter('ignore') obsloglik = log_multivariate_normal_density(lmfcc, utteranceHMM['means'], utteranceHMM['covars'], 'diag') _, viterbiStateIdTrans = viterbi( obsloglik, np.log(utteranceHMM['startprob']), np.log(utteranceHMM['transmat'][:-1, :-1])) return [stateTrans[idx] for idx in viterbiStateIdTrans]
def _compute_log_weighted_gaussian_densities(self, X, i_comp): cur_means = self.means_[i_comp] cur_covs = self.covars_[i_comp] if self.covariance_type == 'spherical': cur_covs = cur_covs[:, np.newaxis] log_cur_weights = np.log(self.weights_[i_comp]) return log_multivariate_normal_density( X, cur_means, cur_covs, self.covariance_type) + log_cur_weights
def gaussian_log_likelihood(x, mean, covariance): covariance_diag = [] for i in range(0, len(mean)): covariance_diag.append(covariance[i][i]) likelihood = log_multivariate_normal_density(np.array([x]), np.array([mean]), np.array([covariance_diag]), covariance_type = 'diag') return likelihood[0][0]
def dup__div__(self, other): x = np.array([self._semantics]) means = np.array([other._semantics]) m, n = x.shape covars = np.ones((1, n)) # print x # print means # print covars return log_multivariate_normal_density(x, means, covars)
def derGMMmodel(GMMmodel, UB): """ Compute derivates of GMM model, respect to each corner as: sum(W*N(x,\mu,\Sigma)*(x - \mu).T inv(\Sigma)) f'(x) = ----------------------------------------------- sum(W*N(x,\mu,\Sigma)) """ outUB = UB U = UB[0:2] B = UB[2:4] #--- Compute deriv respect to Upper corner denU = np.exp(GMMmodel['Upper'].score(U.reshape(1,-1))) numU = np.sum( np.exp( mixture.log_multivariate_normal_density( GMMmodel['Upper'].means_, GMMmodel['Upper'].covars_, GMMmodel['Upper'].covariance_type) ) * GMMmodel['Upper'].weights_ * (GMMmodel['Upper'].mean_ - U).T * np.linalg.inv(GMMmodel['Upper'].covars_), axis=0 ) outUB[0:2] = numU/denU #--- Compute deriv respect to Bottom corner denB = np.exp(GMMmodel['Bottom'].score(B.reshape(1,-1))) numB = np.sum( np.exp( mixture.log_multivariate_normal_density( GMMmodel['Bottom'].means_, GMMmodel['Bottom'].covars_, GMMmodel['Bottom'].covariance_type) ) * GMMmodel['Bottom'].weights_ * (GMMmodel['Bottom'].mean_ - U).T * np.linalg.inv(GMMmodel['Bottom'].covars_), axis=0 ) outUB[2:4] = numB/denB return outUB
def _compute_log_weighted_gaussian_densities(self, X, i_comp): cur_means = self.means_[i_comp] cur_covs = self.covars_[i_comp] if self.covariance_type == 'spherical': cur_covs = cur_covs[:, np.newaxis] log_cur_weights = np.log(self.weights_[i_comp]) return log_multivariate_normal_density( X, cur_means, cur_covs, self.covariance_type ) + log_cur_weights
def pca_enc_score(pca_predy, pca_y, pca_cov): samples = pca_predy.shape[0] if samples != pca_y.shape[0]: raise RuntimeError('X and y need to have the same number of samples') log_likelihood = np.empty((samples,)) for i in xrange(samples): log_likelihood[i] = log_multivariate_normal_density( pca_y[i][None,:], pca_predy[i][None,:], pca_cov[None,:,:], covariance_type='full') return log_likelihood
def test_lmvnpdf_spherical(): n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) spherecv = rng.rand(n_components, 1)**2 + 1 X = rng.randint(10) * rng.rand(n_samples, n_features) cv = np.tile(spherecv, (n_features, 1)) reference = _naive_lmvnpdf_diag(X, mu, cv) lpr = mixture.log_multivariate_normal_density(X, mu, spherecv, 'spherical') assert_array_almost_equal(lpr, reference)
def test_lmvnpdf_spherical(): n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) spherecv = rng.rand(n_components, 1) ** 2 + 1 X = rng.randint(10) * rng.rand(n_samples, n_features) cv = np.tile(spherecv, (n_features, 1)) reference = _naive_lmvnpdf_diag(X, mu, cv) lpr = mixture.log_multivariate_normal_density(X, mu, spherecv, "spherical") assert_array_almost_equal(lpr, reference)
def posterior_prob(self, obs, with_noise=False): """posterior probabilities for data under the model :type obs: ndarray :param obs: observations to be evaluated [n, tf, nc] :type with_noise: bool :param with_noise: if True, include the noise cluster as component in the mixture. Default=False :rtype: ndarray :returns: matrix with per component posterior probabilities [n, c] """ # check obs obs = sp.atleast_2d(obs) if len(obs) == 0: raise ValueError('no observations passed!') data = [] if obs.ndim == 2: if obs.shape[1] != self._tf * self._nc: raise ValueError('data dimensions not compatible with model') for i in xrange(obs.shape[0]): data.append(obs[i]) elif obs.ndim == 3: if obs.shape[1:] != (self._tf, self._nc): raise ValueError('data dimensions not compatible with model') for i in xrange(obs.shape[0]): data.append(mcvec_to_conc(obs[i])) data = sp.asarray(data, dtype=sp.float64) # build comps comps = self.get_template_set(mc=False) if with_noise: comps = sp.vstack((comps, sp.zeros((self._tf * self._nc)))) comps = comps.astype(sp.float64) if len(comps) == 0: return sp.zeros((len(obs), 1)) # build priors prior = sp.array([self._lpr_s] * len(comps), dtype=sp.float64) if with_noise: prior[-1] = self._lpr_n # get sigma try: sigma = self._ce.get_cmx(tf=self._tf).astype(sp.float64) except: return sp.zeros((len(obs), 1)) # calc log probs lpr = log_multivariate_normal_density(data, comps, sigma, 'tied') + prior logprob = logsumexp(lpr, axis=1) return sp.exp(lpr - logprob[:, sp.newaxis])
def test_lmvnpdf_full(): n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) cv = (rng.rand(n_components, n_features) + 1.0) ** 2 X = rng.randint(10) * rng.rand(n_samples, n_features) fullcv = np.array([np.diag(x) for x in cv]) reference = _naive_lmvnpdf_diag(X, mu, cv) lpr = mixture.log_multivariate_normal_density(X, mu, fullcv, 'full') assert_array_almost_equal(lpr, reference)
def test_lmvnpdf_diag(): # test a slow and naive implementation of lmvnpdf and # compare it to the vectorized version (mixture.lmvnpdf) to test # for correctness n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) cv = (rng.rand(n_components, n_features) + 1.0)**2 X = rng.randint(10) * rng.rand(n_samples, n_features) ref = _naive_lmvnpdf_diag(X, mu, cv) lpr = mixture.log_multivariate_normal_density(X, mu, cv, 'diag') assert_array_almost_equal(lpr, ref)
def test_lmvnpdf_full(): n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) cv = (rng.rand(n_components, n_features) + 1.0)**2 X = rng.randint(10) * rng.rand(n_samples, n_features) fullcv = np.array([np.diag(x) for x in cv]) reference = _naive_lmvnpdf_diag(X, mu, cv) lpr = mixture.log_multivariate_normal_density(X, mu, fullcv, 'full') assert_array_almost_equal(lpr, reference)
def getGMMProbs(GMM,argument,embeddings): #logs = mixture.log_multivariate_normal_density(np.array([embeddings[argument]]), GMM.means_, GMM.covars_, GMM.covariance_type)[0] + np.log(GMM.weights_) X = np.array([embeddings[argument]]) lpr = (mixture.log_multivariate_normal_density(X, GMM.means_, GMM.covars_, GMM.covariance_type)) probs = np.exp(lpr) return probs[0]
def test_lmvnpdf_diag(): # test a slow and naive implementation of lmvnpdf and # compare it to the vectorized version (mixture.lmvnpdf) to test # for correctness n_features, n_components, n_samples = 2, 3, 10 mu = rng.randint(10) * rng.rand(n_components, n_features) cv = (rng.rand(n_components, n_features) + 1.0) ** 2 X = rng.randint(10) * rng.rand(n_samples, n_features) ref = _naive_lmvnpdf_diag(X, mu, cv) lpr = mixture.log_multivariate_normal_density(X, mu, cv, 'diag') assert_array_almost_equal(lpr, ref)
def main(): a = concatHMMs(phoneHMMs, namelist=prondict['4']) data = np.load('lab2_data.npz')['data'][10] #loglik=example['obsloglik'] #print() fakelog = log_multivariate_normal_density(data['lmfcc'], a['means'], a['covars']) #fakelog=log_multivariate_normal_density(example['lmfcc'],a['means'],a['covars']) #print(fakelog) #plt.pcolormesh(example['lmfcc']) #plt.show() #4.1 #plt.pcolormesh(fakelog.transpose()) #plt.colorbar() #plt.show() #4.2 log_alpha = forward(fakelog, np.log(a['startprob']), np.log(a['transmat'])) #print(log_alpha) #4.3 x = viterbi(fakelog, np.log(a['startprob']), np.log(a['transmat'])) #4.4 log_beta = backward(fakelog, np.log(a['startprob']), np.log(a['transmat'])) #print(log_beta) #5.1 # print(log_alpha) # print("------------------------------") # print(log_beta) log_gamma = statePosteriors(log_alpha, log_beta) #print(log_gamma) #5.2 mu, covar = updateMeanAndVar(data['lmfcc'], log_gamma) #new_fake_log=log_multivariate_normal_density(data['lmfcc'],mu,covar) new_fake_log = fakelog for x in range(1, 15): log_alpha = forward(new_fake_log, np.log(a['startprob']), np.log(a['transmat'])) log_beta = backward(new_fake_log, np.log(a['startprob']), np.log(a['transmat'])) log_gamma = statePosteriors(log_alpha, log_beta) mu, covar = updateMeanAndVar(data['lmfcc'], log_gamma) #covar=a['covars'] new_fake_log = log_multivariate_normal_density_diag( data['lmfcc'], mu, covar)
def logPdf(self, x): x = np.array([x], ndmin=2) return log_multivariate_normal_density(x, self.mean, self.covar)
reload(proto2) tidigits = np.load('lab2_tidigits.npz')['tidigits'] models = np.load('lab2_models.npz')['models'] example = np.load('lab2_example.npz')['example'].item() plt.figure(1) # Plot Mfcc plt.subplot(511) plt.imshow(example['mfcc'].transpose(), origin='lower', interpolation='nearest', aspect='auto') # Compute 4: Multivariate Gaussian Density gmm_obsloglik = skm.log_multivariate_normal_density(example['mfcc'], models[0]['gmm']['means'], models[0]['gmm']['covars'], 'diag') hmm_obsloglik = skm.log_multivariate_normal_density(example['mfcc'], models[0]['hmm']['means'], models[0]['hmm']['covars'], 'diag') plt.subplot(512) plt.imshow(gmm_obsloglik.transpose(), origin='lower', interpolation='nearest', aspect='auto') plt.subplot(513) plt.imshow(hmm_obsloglik.transpose(), origin='lower', interpolation='nearest', aspect='auto') # Compute 5 : GMM Likelihood and Recognition # Retrieve example['gmm_loglik'] gmmloglik = proto2.gmmloglik(gmm_obsloglik, models[0]['gmm']['weights'])
from sklearn.mixture import log_multivariate_normal_density plt.rcParams['image.cmap'] = 'jet' tidigits = np.load('lab2_tidigits.npz', encoding='latin1')['tidigits'] models = np.load('lab2_models.npz', encoding='latin1')['models'] example = np.load('lab2_example.npz', encoding='latin1')['example'].item() # plt.pcolormesh(example['hmm_obsloglik'].T) # plt.show() # (( 4 )) # example using hmm: probabilities of gaussianans in mixture given samples hmm_obsloglik = log_multivariate_normal_density(example['mfcc'], models[0]['hmm']['means'], models[0]['hmm']['covars']) print(np.sum(hmm_obsloglik - example['hmm_obsloglik'])) # example using gmm: probabilities of gaussianans in mixture given samples gmm_obsloglik = log_multivariate_normal_density(example['mfcc'], models[0]['gmm']['means'], models[0]['gmm']['covars']) print(np.sum(gmm_obsloglik - example['gmm_obsloglik'])) # print(tidigits[0].keys()) # print(tidigits[10]['digit']) # print(models[5]['digit']) # Utterances and model corresponding to digit 'Four' : probabilities of gaussianans in mixture given samples # hmm_obsloglik4 = log_multivariate_normal_density(tidigits[10]['mfcc'], models[5]['hmm']['means'],models[5]['hmm']['covars']) # gmm_obsloglik4 = log_multivariate_normal_density(tidigits[10]['mfcc'], models[5]['gmm']['means'],models[5]['gmm']['covars']) #
wm_wcereb.set_index(df.index,inplace=True) wm_wcereb.columns = ['wm_wcereb'] wm_vs_summary = wm_wcereb.merge(summary_wcereb, left_index=True, right_index=True) # 1D GMM input_df = summary_wcereb g_1 = GMM(n_components=2, covariance_type='full', tol=1e-6, n_iter=700, params='wmc', init_params='wmc') g_1.fit(input_df) # calculate prob, disregard weights lpr = log_multivariate_normal_density(input_df,g_1.means_,g_1.covars_,g_1.covariance_type) logprob = logsumexp(lpr,axis=1) responsibilities = np.exp(lpr - logprob[:, np.newaxis]) probs = pd.DataFrame(responsibilities) probs.set_index(input_df.index,inplace=True) probs.columns = ['prob_0','prob_1'] probs.loc[:,'color'] = 'k' probs.loc[probs.prob_0>=0.90, 'color'] = 'r' probs.loc[probs.prob_1>=0.90, 'color'] = 'b' # plot 1D GMM delta= 0.0001 x = np.arange(0.5, 1.2, delta) mu_1, sigma_1 = (g_1.means_[0][0],np.sqrt(g_1.covars_[0][0])) mu_2, sigma_2 = (g_1.means_[1][0],np.sqrt(g_1.covars_[1][0])) intervals_1 = norm.interval(0.95,loc=mu_1,scale=sigma_1) intervals_2 = norm.interval(0.95,loc=mu_2,scale=sigma_2)
def _compute_log_likelihood(self, X): return log_multivariate_normal_density(X, self.means_, self._covars_, self.covariance_type)
s_val = np.vstack((s_val, (classifier.weights_*temp_val).sum(axis =1))) x_train = pd.DataFrame(s_train.T) x_test = pd.DataFrame(s_test.T) x_val = pd.DataFrame(s_val.T) x_train = x_train.drop(x_train.columns[[0]],axis = 1) x_test = x_test.drop(x_test.columns[[0]],axis = 1) x_val = x_val.drop(x_val.columns[[0]],axis = 1) return x_train, x_test, x_val s = classifier.score(X_train.todense()) from sklearn.utils.extmath import logsumexp from sklearn import mixture lpr = (mixture.log_multivariate_normal_density(X_train.todense(), classifier.means_, classifier.covars_, classifier.covariance_type) + np.log(classifier.weights_)) # probabilities of components logprob = logsumexp(lpr, axis=1) # logsum to get probability of GMM probs = np.exp(logprob) # 0 < probs < 1 p = prior(y_train) x = x_train + np.log(p) x = x.astype(float) z = x r = logsumexp(d,axis = 1) d = x.as_matrix() x['logsum'] = r z = np.exp(x.subtract(x['logsum'],axis=0).drop('logsum',1)) checkAccuracy(z,y_train)
tw = open(dir_dataset + "topic_sanders_twitter.txt", "r") for l in tw: labels.append(l) i = 0 num_topics = dict() for tp in set(labels): num_topics[tp] = i i = i + 1 labels = [num_topics[x] for x in labels] words = [w for w, v in model.vocab.items()] word_vectors = model.syn0 gmm_model = mix.GMM(n_components = k, n_iter = 1000, covariance_type = 'diag') gmm_model.fit(word_vectors) log_probs = mix.log_multivariate_normal_density(word_vectors, gmm_model.means_, gmm_model.covars_, gmm_model.covariance_type) word_topic = list() _, num_col = log_probs.shape for col in range(num_col): top_n = 10 log_component_probs = (log_probs[:, col]).T sorted_indexes = np.argsort(log_component_probs)[::-1][:top_n] ordered_word_probs = [(model.index2word[idx], log_component_probs[idx]) for idx in sorted_indexes] word_topic.append([model.index2word[idx] for idx in sorted_indexes]) print('---') print("Topic {0}".format(col + 1)) print("Total prob:" + str(sum(log_component_probs))) print(", ".join(["{w}: {p}".format(w = w, p = p) for w, p in ordered_word_probs]))
def _compute_log_likelihood(self, obs): return log_multivariate_normal_density( obs, self._means_, self._covars_, self._covariance_type)
def adjust(x, mu): means = np.array([mu]) m,n = x.shape covars = 0.01*np.ones((1,n)) p = log_multivariate_normal_density(x, means, covars) return np.sum((p > -0.5 )* 1)
def select(x, mu): means = np.array([mu]) m,n = x.shape covars = 0.01*np.ones((1,n)) p = log_multivariate_normal_density(x, means, covars) return p > -0.5
def normal(x, mu): means = np.array([mu]) m,n = x.shape covars = 0.01*np.ones((1,n)) return log_multivariate_normal_density(x, means, covars)