def test_logsumexp(): # Try to add some smallish numbers in logspace x = np.array([1e-40] * 1000000) logx = np.log(x) assert_almost_equal(np.exp(logsumexp(logx)), x.sum()) X = np.vstack([x, x]) logX = np.vstack([logx, logx]) assert_array_almost_equal(np.exp(logsumexp(logX, axis=0)), X.sum(axis=0)) assert_array_almost_equal(np.exp(logsumexp(logX, axis=1)), X.sum(axis=1))
def eval(self, X): """Evaluate the model on data Compute the log probability of X under the model and return the posterior distribution (responsibilities) of each mixture component for each element of X. Parameters ---------- X: array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- logprob: array_like, shape (n_samples,) Log probabilities of each data point in X responsibilities: array_like, shape (n_samples, n_components) Posterior probabilities of each mixture component for each observation """ X = np.asarray(X) if X.ndim == 1: X = X[:, np.newaxis] if X.size == 0: return np.array([]), np.empty((0, self.n_components)) if X.shape[1] != self.means_.shape[1]: raise ValueError('the shape of X is not compatible with self') if self.blocksize > 0: logprob = np.zeros(X.shape[0], dtype=self.float_type) responsibilities = np.zeros((X.shape[0], self.n_components), dtype=self.float_type) block_id = 0 if self.verbose: print("Running block multiplication") for block_id in range(0, X.shape[0], self.blocksize): blockend = min(X.shape[0], block_id + self.blocksize) lpr = (log_product_of_bernoullis_mixture_likelihood( X[block_id:blockend], self.log_odds_, self.log_inv_mean_sums_) + np.log(self.weights_)) logprob[block_id:blockend] = logsumexp(lpr, axis=1) responsibilities[block_id:blockend] = np.exp( lpr - (logprob[block_id:blockend])[:, np.newaxis]) else: lpr = (log_product_of_bernoullis_mixture_likelihood( X, self.log_odds_, self.log_inv_mean_sums_) + np.log(self.weights_)) logprob = logsumexp(lpr, axis=1) responsibilities = np.exp(lpr - logprob[:, np.newaxis]) return logprob, responsibilities
def eval(self, X): """Evaluate the model on data Compute the log probability of X under the model and return the posterior distribution (responsibilities) of each mixture component for each element of X. Parameters ---------- X: array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- logprob: array_like, shape (n_samples,) Log probabilities of each data point in X responsibilities: array_like, shape (n_samples, n_components) Posterior probabilities of each mixture component for each observation """ X = np.asarray(X) if X.ndim == 1: X = X[:, np.newaxis] if X.size == 0: return np.array([]), np.empty((0, self.n_components)) if X.shape[1] != self.means_.shape[1]: raise ValueError('the shape of X is not compatible with self') if self.blocksize > 0: logprob = np.zeros(X.shape[0],dtype=self.float_type) responsibilities = np.zeros((X.shape[0],self.n_components),dtype=self.float_type) block_id = 0 if self.verbose: print("Running block multiplication") for block_id in range(0,X.shape[0],self.blocksize): blockend = min(X.shape[0],block_id+self.blocksize) lpr = (log_product_of_bernoullis_mixture_likelihood(X[block_id:blockend], self.log_odds_, self.log_inv_mean_sums_) + np.log(self.weights_)) logprob[block_id:blockend] = logsumexp(lpr, axis=1) responsibilities[block_id:blockend] = np.exp(lpr - (logprob[block_id:blockend])[:, np.newaxis]) else: lpr = (log_product_of_bernoullis_mixture_likelihood(X, self.log_odds_, self.log_inv_mean_sums_) + np.log(self.weights_)) logprob = logsumexp(lpr, axis=1) responsibilities = np.exp(lpr - logprob[:, np.newaxis]) return logprob, responsibilities
def _accumulate_sufficient_statistics(self, stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice, params, seq_weight): stats['nobs'] += 1 * seq_weight if 's' in params: stats['start'] += posteriors[0] * seq_weight if 't' in params: n_observations, n_components = framelogprob.shape lneta = np.zeros((n_observations - 1, n_components, n_components)) lnP = logsumexp(fwdlattice[-1]) _hmmc._compute_lneta(n_observations, n_components, fwdlattice, self._log_transmat, bwdlattice, framelogprob, lnP, lneta) stats["trans"] += np.exp(logsumexp(lneta, 0)) * seq_weight
def normalize_logspace(a): """Normalizes the array `a` in the log domain. Each row of `a` is a log discrete distribution. Returns the array normalized in the log domain while minimizing the possibility of numerical underflow. Parameters ---------- a : ndarray The array to normalize in the log domain. Returns ------- a : ndarray The array normalized in the log domain. lnorm : float log normalization constant. Examples -------- >>> normalize_logspace() .. note:: Adapted from Matlab: | Project: `Probabilistic Modeling Toolkit for Matlab/Octave <https://github.com/probml/pmtk3>`_. | Copyright (2010) Kevin Murphy and Matt Dunham | License: `MIT <https://github.com/probml/pmtk3/blob/5fefd068a2e84ae508684d3e4750bd72a4164ba0/license.txt>`_ """ l = logsumexp(a, 1) y = a.T - l return y.T, l
def estimate_log_prob_resp(self, X): weighted_log_prob = self.estimate_weighted_log_prob(X) log_prob_norm = logsumexp(weighted_log_prob, axis=1) with np.errstate(under='ignore'): # ignore underflow log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis] return log_prob_norm, log_resp
def fit(self, obs): # same implementation as in sklearn, but returns the learning curve if self.algorithm not in decoder_algorithms: self._algorithm = "viterbi" self._init(obs, self.init_params) logprob = [] for i in range(self.n_iter): # Expectation step stats = self._initialize_sufficient_statistics() curr_logprob = 0 for seq in obs: framelogprob = self._compute_log_likelihood(seq) lpr, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T curr_logprob += lpr self._accumulate_sufficient_statistics( stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice, self.params) logprob.append(curr_logprob) # Check for convergence. if i > 0 and abs(logprob[-1] - logprob[-2]) < self.thresh: break # Maximization step self._do_mstep(stats, self.params) return logprob
def _do_forward_pass(self, framelogprob): n_observations = framelogprob.shape[0] state_combinations = [tuple(x) for x in list(itertools.product(np.arange(self.n_states), repeat=self.n_chains))] fwdlattice = np.zeros((n_observations, self.n_states ** self.n_chains)) fhmmc._forward(n_observations, self.n_chains, self.n_states, state_combinations, self.log_startprob, self.log_transmat, framelogprob, fwdlattice) return logsumexp(fwdlattice[-1]), fwdlattice
def eval(self, obs): """Evaluate the model on data Compute the log probability of `obs` under the model and return the posterior distribution (responsibilities) of each mixture component for each element of `obs`. Parameters ---------- obs: array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- logprob: array_like, shape (n_samples,) Log probabilities of each data point in `obs` posteriors: array_like, shape (n_samples, n_components) Posterior probabilities of each mixture component for each observation """ obs = np.asarray(obs) lpr = lmvnpdf(obs, self._means, self._covars, self._cvtype) + self._log_weights logprob = logsumexp(lpr, axis=1) posteriors = np.exp(lpr - logprob[:, np.newaxis]) return logprob, posteriors
def score_samples(self, X): """Return the per-sample likelihood of the data under the model. Compute the log probability of X under the model and return the posterior distribution (responsibilities) of each mixture component for each element of X. Parameters ---------- X: array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- logprob : array_like, shape (n_samples,) Log probabilities of each data point in X. responsibilities : array_like, shape (n_samples, n_components) Posterior probabilities of each mixture component for each observation """ X = check_angular(X) # Don't use components whose weights fell to 0. Is this correct? Hack # for use in webapp. ## TODO: REMOVE BEFORE SUBMITTING TO SKLEARN! good_comps = (abs(self.weights_) > 1e-8) logprobs = (log_vmf_pdf(X, self.means_[good_comps], self.precs_) + np.log(self.weights_[good_comps][np.newaxis])) logprob = logsumexp(logprobs, axis=1) responsibilities = np.exp(logprobs - logprob[:, np.newaxis]) return logprob, responsibilities
def _exact_loglikelihood(self, ob): log_transmat = np.zeros((self.n_chains, self.n_states, self.n_states)) log_startprob = np.zeros((self.n_chains, self.n_states)) for idx, chain in enumerate(self.chains_): log_transmat[idx] = chain._log_transmat log_startprob[idx] = chain._log_startprob n_state_combinations = self.n_states ** self.n_chains state_combinations = [tuple(x) for x in list(itertools.product(np.arange(self.n_states), repeat=self.n_chains))] n_observations = ob.shape[0] n_features = ob.shape[1] fwdlattice = np.zeros((n_observations, n_state_combinations)) # Calculate means and covariances for all state combinations and calculate emission probabilities weight = (1.0 / float(self.n_chains)) weight_squared = weight * weight covars = np.zeros((n_state_combinations, n_features)) # TODO: add support for all covariance types means = np.zeros((n_state_combinations, n_features)) for idx, state_combination in enumerate(state_combinations): for chain_idx, state in enumerate(state_combination): chain = self.chains_[chain_idx] covars[idx] += chain._covars_[state] means[idx] += chain._means_[state] covars[idx] *= weight_squared means[idx] *= weight framelogprob = log_multivariate_normal_density(ob, means, covars, covariance_type='diag') # TODO: add support for all covariance types # Run the forward algorithm fhmmc._forward(n_observations, self.n_chains, self.n_states, state_combinations, log_startprob, log_transmat, framelogprob, fwdlattice) last_column = fwdlattice[-1] assert np.size(last_column) == n_state_combinations score = logsumexp(last_column) return score
def fit(self, obs): # same implementation as in sklearn, but returns the learning curve if self.algorithm not in decoder_algorithms: self._algorithm = "viterbi" self._init(obs, self.init_params) logprob = [] for i in range(self.n_iter): # Expectation step stats = self._initialize_sufficient_statistics() curr_logprob = 0 for seq in obs: framelogprob = self._compute_log_likelihood(seq) lpr, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T curr_logprob += lpr self._accumulate_sufficient_statistics(stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice, self.params) logprob.append(curr_logprob) # Check for convergence. if i > 0 and abs(logprob[-1] - logprob[-2]) < self.thresh: break # Maximization step self._do_mstep(stats, self.params) return logprob
def log_likelihood(X, doc_topic_distr, topic_word_distr): log_doc_topic_distr = np.log(doc_topic_distr + 10 * EPS) log_topic_word_distr = np.log(topic_word_distr + 10 * EPS) is_sparse_x = sp.issparse(X) n_samples, n_features = X.shape n_topics = topic_word_distr.shape[0] if is_sparse_x: X_data = X.data X_indices = X.indices X_indptr = X.indptr log_lik = 0.0 for dd in range(n_samples): if is_sparse_x: ids = X_indices[X_indptr[dd]:X_indptr[dd + 1]] cnts = X_data[X_indptr[dd]:X_indptr[dd + 1]] else: ids = np.nonzero(X[dd, :])[0] cnts = X[dd, ids] log_word_d = logsumexp((log_doc_topic_distr[dd, :])[:, np.newaxis] + log_topic_word_distr[:, ids], axis=0) log_lik += np.sum(log_word_d * cnts) return log_lik
def _fit(self, obs): prev_loglikelihood = None for iteration in xrange(self.n_training_iterations): stats = self._initialize_sufficient_statistics() curr_loglikelihood = 0 for seq in obs: # Forward-backward pass and accumulate stats framelogprob = self._compute_log_likelihood(seq) lpr, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T assert np.allclose(np.sum(posteriors, axis=1), 1.0) # posteriors must sum to 1 for each t curr_loglikelihood += lpr self._accumulate_sufficient_statistics(stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice) # Test for convergence if prev_loglikelihood is not None: delta = curr_loglikelihood - prev_loglikelihood print('%f (%f)' % (curr_loglikelihood, delta)) assert delta >= -0.01 # Likelihood when training with Baum-Welch should grow monotonically if delta <= self.training_threshold: break self._do_mstep(stats) prev_loglikelihood = curr_loglikelihood
def E_step( self, X): N,D = X.shape lpr = np.zeros( (N, self.gmm.K) ) logdet = np.zeros( self.gmm.K ) dterms = np.arange( 1,D+1 ) # 1,2,3... D self.invWchol = list() for k in range(self.gmm.K): dXm = X - self.qMixComp[k].m L = scipy.linalg.cholesky( self.qMixComp[k].invW, lower=True) self.invWchol.append( L ) if np.any( np.isnan(L) | np.isinf(L) ): print 'NaN!', self.qMixComp[k] #invL = scipy.linalg.inv( L ) # want: Q = invL * X.T # so we solve for matrix Q s.t. L*Q = X.T lpr[:,k] = -0.5*self.qMixComp[k].dF \ * np.sum( scipy.linalg.solve_triangular( L, dXm.T,lower=True)**2, axis=0) lpr[:,k] -= 0.5*D/self.qMixComp[k].beta # det( W ) = 1/det(invW) # = 1/det( L )**2 # det of triangle matrix = prod of diag entries logdet[k] = -2*np.sum( np.log(np.diag(L) ) ) + D*np.log(2.0) logdet[k] += digamma( 0.5*(dterms+1+self.qMixComp[k].dF) ).sum() self.logwtilde = digamma( self.alpha ) - digamma( self.alpha.sum() ) self.logLtilde = logdet lpr += self.logwtilde lpr += logdet lprSUM = logsumexp(lpr, axis=1) resp = np.exp(lpr - lprSUM[:, np.newaxis]) resp /= resp.sum( axis=1)[:,np.newaxis] # row normalize return resp
def test_multinomial_loss_ground_truth(): # n_samples, n_features, n_classes = 4, 2, 3 n_classes = 3 X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]]) y = np.array([0, 1, 2, 0]) lbin = LabelBinarizer() Y_bin = lbin.fit_transform(y) weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]]) intercept = np.array([1., 0, -.2]) sample_weights = np.array([0.8, 1, 1, 0.8]) prediction = np.dot(X, weights) + intercept logsumexp_prediction = logsumexp(prediction, axis=1) p = prediction - logsumexp_prediction[:, np.newaxis] loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum() diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin) grad_1 = np.dot(X.T, diff) weights_intercept = np.vstack((weights, intercept)).T.ravel() loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin, 0.0, sample_weights) grad_2 = grad_2.reshape(n_classes, -1) grad_2 = grad_2[:, :-1].T assert_almost_equal(loss_1, loss_2) assert_array_almost_equal(grad_1, grad_2) # ground truth loss_gt = 11.680360354325961 grad_gt = np.array([[-0.557487, -1.619151, +2.176638], [-0.903942, +5.258745, -4.354803]]) assert_almost_equal(loss_1, loss_gt) assert_array_almost_equal(grad_1, grad_gt)
def _fit(self, obs): prev_loglikelihood = None for iteration in xrange(self.n_training_iterations): stats = self._initialize_sufficient_statistics() curr_loglikelihood = 0 for seq in obs: # Forward-backward pass and accumulate stats framelogprob = self._compute_log_likelihood(seq) lpr, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T assert np.allclose(np.sum(posteriors, axis=1), 1.0) # posteriors must sum to 1 for each t curr_loglikelihood += lpr self._accumulate_sufficient_statistics(stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice) # Test for convergence if prev_loglikelihood is not None: delta = curr_loglikelihood - prev_loglikelihood print ('%f (%f)' % (curr_loglikelihood, delta)) assert delta >= -0.01 # Likelihood when training with Baum-Welch should grow monotonically if delta <= self.training_threshold: break self._do_mstep(stats) prev_loglikelihood = curr_loglikelihood
def eval(self, X): """Evaluate the model on data Compute the log probability of X under the model and return the posterior distribution (responsibilities) of each mixture component for each element of X. Parameters ---------- X: array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- logprob: array_like, shape (n_samples,) Log probabilities of each data point in X responsibilities: array_like, shape (n_samples, n_components) Posterior probabilities of each mixture component for each observation """ X = np.asarray(X) if X.ndim == 1: X = X[:, np.newaxis] if X.size == 0: return np.array([]), np.empty((0, self.n_components)) if X.shape[1] != self.means_.shape[1]: raise ValueError("the shape of X is not compatible with self") lpr = log_multivariate_normal_density(X, self.means_, self.covars_, self.covariance_type) + np.log( self.weights_ ) logprob = logsumexp(lpr, axis=1) responsibilities = np.exp(lpr - logprob[:, np.newaxis]) return logprob, responsibilities
def compute_pvalue(distr, N, x, current_p): """Compute log2 pvalue""" sum_num = [] sum_denum = [] for i in range(N + 1): p1 = get_log_value(i, distr) p2 = get_log_value(N - i, distr) p = p1 + p2 #if current_p >= p: if i <= x: sum_num.append(p) sum_denum.append(p) return logsumexp(np.array(sum_num)) - logsumexp(np.array(sum_denum))
def _do_forward_pass(self, framelogprob): n_observations, n_components = framelogprob.shape fwdlattice = np.zeros((n_observations, n_components)) _hmmc._forward(n_observations, n_components, self._log_startprob, self._log_transmat, framelogprob, fwdlattice) fwdlattice[fwdlattice <= ZEROLOGPROB] = NEGINF return logsumexp(fwdlattice[-1]), fwdlattice
def _accumulate_sufficient_statistics(self, stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice, params): stats['nobs'] += 1 if 's' in params: stats['start'] += posteriors[0] if 't' in params: n_observations, n_components = framelogprob.shape # when the sample is of length 1, it contains no transitions # so there is no reason to update our trans. matrix estimate if n_observations > 1: lneta = np.zeros((n_observations - 1, n_components, n_components)) lnP = logsumexp(fwdlattice[-1]) _hmmc._compute_lneta(n_observations, n_components, fwdlattice, self._log_transmat, bwdlattice, framelogprob, lnP, lneta) stats['trans'] += np.exp(np.minimum(logsumexp(lneta, 0), 700))
def compute_pvalue(distr, N, x, current_p): """Compute log2 pvalue""" sum_num = [] sum_denum = [] for i in range(N+1): p1 = get_log_value(i, distr) p2 = get_log_value(N - i, distr) p = p1 + p2 #if current_p >= p: if i <= x: sum_num.append(p) sum_denum.append(p) return logsumexp(np.array(sum_num)) - logsumexp(np.array(sum_denum))
def fitChunk(Xchunk): N, D = Xchunk.shape shLock.acquire() w = sh2np(shWeights).copy() m = sh2np(shMeans, (K, D)).copy() c = sh2np(shCovars, (K, D)).copy() shLock.release() # E step # resp : N x K matrix # resp[n,k] = Pr( z[n]=k | X[n], Mu[k], Sigma[k] ) # properly normalized, so sum( resp, axis=1) = 1.0 lpr = np.log(w) \ + log_multivariate_normal_density_diag(Xchunk, m, c) lprNORMCONST = logsumexp(lpr, axis=1) resp = np.exp(lpr - lprNORMCONST[:, np.newaxis]) # M step Nresp = resp.sum(axis=0) wChunk = Nresp / (Nresp.sum() + EPS) wavg_X = np.dot(resp.T, Xchunk) mChunk = wavg_X / (Nresp[:, np.newaxis]) wavg_X2 = np.dot(resp.T, Xchunk**2) wavg_M2 = m**2 * Nresp[:, np.newaxis] wavg_XM = wavg_X * m cChunk = wavg_X2 - 2 * wavg_XM + wavg_M2 cChunk /= Nresp[:, np.newaxis] #avg_X2 = np.dot(resp.T, Xchunk * Xchunk) * (N*wChunk[:,np.newaxis] ) #avg_means2 = m ** 2 #avg_X_means = m * weighted_X_sum * (N*wChunk[:,np.newaxis] ) #cChunk = avg_X2 - 2 * avg_X_means + avg_means2 + MIN_VAR # Synchronize global shLock.acquire() tstart = time.time() - T_START ww = sh2np(shWeights) #info(" used to compute local updates %.3f %.3f" % ( w[0], w[1] ) ) #info("now using possibly fresher value %.3f %.3f" % ( ww[0], ww[1] ) ) mm = sh2np(shMeans, (K, D)) cc = sh2np(shCovars, (K, D)) t = sh2np(shIterCount, (1, 1)) t += 1 rho = (t + delay)**(-kappa) ww[:] = (1 - rho) * ww + rho * wChunk mm[:, :] = (1 - rho) * mm + rho * mChunk cc[:, :] = (1 - rho) * cc + rho * cChunk tstop = time.time() - T_START #info(" %.3f | %.4f-%.4f sec" % ( rho, tstart, tstop ) ) shLock.release()
def fitChunk( Xchunk ): N,D = Xchunk.shape shLock.acquire() w = sh2np( shWeights ).copy() m = sh2np( shMeans , (K,D) ).copy() c = sh2np( shCovars , (K,D) ).copy() shLock.release() # E step # resp : N x K matrix # resp[n,k] = Pr( z[n]=k | X[n], Mu[k], Sigma[k] ) # properly normalized, so sum( resp, axis=1) = 1.0 lpr = np.log(w) \ + log_multivariate_normal_density_diag(Xchunk, m, c) lprNORMCONST = logsumexp(lpr, axis=1) resp = np.exp(lpr - lprNORMCONST[:, np.newaxis]) # M step Nresp = resp.sum(axis=0) wChunk = Nresp / ( Nresp.sum() + EPS ) wavg_X = np.dot(resp.T, Xchunk) mChunk = wavg_X / (Nresp[:,np.newaxis] ) wavg_X2 = np.dot(resp.T, Xchunk**2) wavg_M2 = m**2 * Nresp[:,np.newaxis] wavg_XM = wavg_X * m cChunk = wavg_X2 -2*wavg_XM + wavg_M2 cChunk /= Nresp[:,np.newaxis] #avg_X2 = np.dot(resp.T, Xchunk * Xchunk) * (N*wChunk[:,np.newaxis] ) #avg_means2 = m ** 2 #avg_X_means = m * weighted_X_sum * (N*wChunk[:,np.newaxis] ) #cChunk = avg_X2 - 2 * avg_X_means + avg_means2 + MIN_VAR # Synchronize global shLock.acquire() tstart = time.time()- T_START ww = sh2np( shWeights ) #info(" used to compute local updates %.3f %.3f" % ( w[0], w[1] ) ) #info("now using possibly fresher value %.3f %.3f" % ( ww[0], ww[1] ) ) mm = sh2np( shMeans , (K,D) ) cc = sh2np( shCovars , (K,D) ) t = sh2np( shIterCount, (1,1) ) t += 1 rho = (t + delay)**(-kappa) ww[:] = (1-rho)*ww + rho*wChunk mm[:,:] = (1-rho)*mm + rho*mChunk cc[:,:] = (1-rho)*cc + rho*cChunk tstop = time.time() - T_START #info(" %.3f | %.4f-%.4f sec" % ( rho, tstart, tstop ) ) shLock.release()
def log_pdf(self, x, nargout=1): lpr = np.empty((len(x), self.n_components)) for i, c in enumerate(self.components): lpr[:, i] = c.log_pdf(x) + np.log(c.weight) logprob = logsumexp(lpr, axis=1) if nargout > 1: component_posterior = np.exp(lpr - logprob[:, np.newaxis]) return logprob, component_posterior return logprob
def posterior(pXoverC, prior): #x = pXoverC*prior #x['sum'] = x.sum(axis=1) #z = x.div(x['sum'],axis = 0).drop('sum',1) x = pXoverC + np.log(prior) x = x.astype(float) x['logsum'] = logsumexp(x.as_matrix(),axis = 1) z = np.exp(x.subtract(x['logsum'],axis=0).drop('logsum',1)) return z
def score(self, x): """Get the log prob of the x variable being generated by the mixture.""" x = x.reshape((1, len(x))) lpr = np.log(self.weights) + self.log_multivariate_normal_density_diag( x, self.means, self.covars) log_prob = logsumexp(lpr) return log_prob
def _gam(self, X): log_gs_nk = np.array([log_gaussian(X, self.mean[i], self.cov[i]) for i in range(self.K)]).T log_pi_gs_nk = np.log(self.pi)[na, :] + log_gs_nk log_gam_nk = log_pi_gs_nk[:, :] - extmath.logsumexp(log_pi_gs_nk, axis=1)[:, na] return np.exp(log_gam_nk)
def _infer_markov_blanket(h,seq): framelogprob = h._compute_log_likelihood(seq) logprob, fwdlattice = h._do_forward_pass(framelogprob) bwdlattice = h._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice - framelogprob posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T posteriors += np.finfo(np.float32).eps posteriors /= np.sum(posteriors, axis=1).reshape((-1, 1)) return posteriors
def EvaluateDatumInOneDimension(self, gmm, datum, iii): pVarInGaussianLogE = [ np.log(w) + NormalDistributionLoge( gmm.means_[k][iii], gmm.covars_[k][iii][iii], datum.annotations[iii]) for k, w in enumerate(gmm.weights_) ] return logsumexp(np.array(pVarInGaussianLogE)) / np.log( 10) # np.log10(Sum(pi_k * p(v|n,k)))
def posterior(pXoverC, prior): #x = pXoverC*prior #x['sum'] = x.sum(axis=1) #z = x.div(x['sum'],axis = 0).drop('sum',1) x = pXoverC + np.log(prior) x = x.astype(float) x['logsum'] = logsumexp(x.as_matrix(), axis=1) z = np.exp(x.subtract(x['logsum'], axis=0).drop('logsum', 1)) return z
def predict_proba(self, X): assert (self.means is not None), "Model not trained" weighted_log_probs = np.log(self.weights) \ + np.array([multivariate_normal.logpdf(X, mean=self.means[i], cov=self.covariances[i]) for i in xrange(self.k)]).T log_prob_norm = logsumexp(weighted_log_probs, axis=1) return np.exp(weighted_log_probs - log_prob_norm[:, np.newaxis])
def score_samples(X, means, weights, covars, covariance_type='diag'): lpr = (log_multivariate_normal_density(X, means, covars, covariance_type) + np.log(weights)) logprob = logsumexp(lpr.clip(min=-300), axis=1) #responsibilities = np.exp(lpr - logprob[:, np.newaxis]) log_resp = lpr - logprob[:, np.newaxis] return logprob, log_resp
def _multinomial_loss(w, X, Y, alpha, sample_weight): """Computes multinomial loss and class probabilities. Parameters ---------- w : ndarray, shape (n_classes * n_features,) or (n_classes * (n_features + 1),) Coefficient vector. X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data. Y : ndarray, shape (n_samples, n_classes) Transformed labels according to the output of LabelBinarizer. alpha : float Regularization parameter. alpha is equal to 1 / C. sample_weight : array-like, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- loss : float Multinomial loss. p : ndarray, shape (n_samples, n_classes) Estimated class probabilities. w : ndarray, shape (n_classes, n_features) Reshaped param vector excluding intercept terms. Reference --------- Bishop, C. M. (2006). Pattern recognition and machine learning. Springer. (Chapter 4.3.4) """ n_classes = Y.shape[1] n_features = X.shape[1] fit_intercept = w.size == (n_classes * (n_features + 1)) w = w.reshape(n_classes, -1) alpha = alpha.reshape(n_classes, -1) sample_weight = sample_weight[:, np.newaxis] if fit_intercept: intercept = w[:, -1] w = w[:, :-1] else: intercept = 0 p = safe_sparse_dot(X, w.T) p += intercept p -= logsumexp(p, axis=1)[:, np.newaxis] loss = -(sample_weight * Y * p).sum() loss += 0.5 * ((alpha + L2_REG) * w * w).sum() p = np.exp(p, p) return loss, p, w
def rpotts(X, model): features, A = X n_classes = len(model.classes_) n_sites = features.shape[0] n_neighbors = A.sum(axis=1).reshape(-1, 1) R = np.random.uniform(size=(n_sites, 1)) lower = np.empty((n_sites, 1)) upper = np.empty((n_sites, 1)) betas = model.coef_[:, :-1] eta = model.coef_[:, -1:] p = safe_sparse_dot(features, betas.T, dense_output=True) p += model.intercept_ p_nonspatial = np.hstack((p, np.zeros((features.shape[0], 1)))) p_nonspatial -= logsumexp(p_nonspatial, axis=1)[:, np.newaxis] p_nonspatial = np.exp(p_nonspatial, p_nonspatial) _target = model.lbin.transform i = 0 while not np.array_equal(upper, lower): R = np.hstack((np.random.uniform(size=R.shape), R)) print(upper.sum(), lower.sum()) lower[:] = 0 upper[:] = n_classes - 1 for r in R.T: r = r.reshape(-1, 1) upper_multi = _target(upper) upper_spatial = safe_sparse_dot(A, (upper_multi - p_nonspatial))[:, :-1]/n_neighbors upper_spatial[np.isnan(upper_spatial)] = 0 upper_p = p + (eta.T * np.array(upper_spatial)) upper_p = softmax(np.hstack((upper_p, np.zeros((features.shape[0], 1))))) upper_p = upper_p.cumsum(axis=1) lower_multi = _target(lower) lower_spatial = safe_sparse_dot(A, (lower_multi - p_nonspatial))[:, :-1]/n_neighbors lower_spatial[np.isnan(lower_spatial)] = 0 lower_p = p + (eta.T * np.array(lower_spatial)) lower_p = softmax(np.hstack((lower_p, np.zeros((features.shape[0], 1))))) lower_p = lower_p.cumsum(axis=1) upper = (upper_p > r).argmax(axis = 1) lower = (lower_p > r).argmax(axis = 1) return lower.reshape(-1, 1)
def fit(self, obs): """Estimate model parameters. An initialization step is performed before entering the EM algorithm. If you want to avoid this step, pass proper ``init_params`` keyword argument to estimator's constructor. Parameters ---------- obs : list List of array-like observation sequences, each of which has shape (n_i, n_features), where n_i is the length of the i_th observation. Notes ----- In general, `logprob` should be non-decreasing unless aggressive pruning is used. Decreasing `logprob` is generally a sign of overfitting (e.g. a covariance parameter getting too small). You can fix this by getting more training data, or strengthening the appropriate subclass-specific regularization parameter. """ if self.algorithm not in decoder_algorithms: self._algorithm = "viterbi" self._init(obs, self.init_params) logprob = [] for i in range(self.n_iter): # Expectation step stats = self._initialize_sufficient_statistics() curr_logprob = 0 for seq in obs: framelogprob = self._compute_log_likelihood(seq) lpr, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T curr_logprob += lpr self._accumulate_sufficient_statistics( stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice, self.params) logprob.append(curr_logprob) # Check for convergence. if i > 0 and logprob[-1] - logprob[-2] < self.thresh: break # Maximization step self._do_mstep(stats, self.params) self.logprob_ = logprob return self
def expectation(self, x): """ Evaluate one example """ lpr = np.log(self.weights) + self.log_multivariate_normal_density_diag( x, self.means, self.covars) # print "lpr", lpr log_prob = logsumexp(lpr) responsibilities = np.exp(lpr - log_prob) return log_prob, responsibilities
def calcObj(self,xtrain,obj='MAP'): jll = self._joint_log_likelihood(xtrain) # normalize by P(x) = P(f_1, ..., f_n) log_prob_x = logsumexp(jll, axis=1) log_prob = np.sum(log_prob_x,axis=0) if obj == 'ML': return log_prob elif obj == 'MAP': log_theta = np.sum(self.class_log_prior_)+np.sum(self.feature_log_prob_) log_prob = log_prob+(self.alpha-1)*log_theta return log_prob
def posterior_prob(self, obs, with_noise=False): """posterior probabilities for data under the model :type obs: ndarray :param obs: observations to be evaluated [n, tf, nc] :type with_noise: bool :param with_noise: if True, include the noise cluster as component in the mixture. Default=False :rtype: ndarray :returns: matrix with per component posterior probabilities [n, c] """ # check obs obs = sp.atleast_2d(obs) if len(obs) == 0: raise ValueError('no observations passed!') data = [] if obs.ndim == 2: if obs.shape[1] != self._tf * self._nc: raise ValueError('data dimensions not compatible with model') for i in xrange(obs.shape[0]): data.append(obs[i]) elif obs.ndim == 3: if obs.shape[1:] != (self._tf, self._nc): raise ValueError('data dimensions not compatible with model') for i in xrange(obs.shape[0]): data.append(mcvec_to_conc(obs[i])) data = sp.asarray(data, dtype=sp.float64) # build comps comps = self.get_template_set(mc=False) if with_noise: comps = sp.vstack((comps, sp.zeros((self._tf * self._nc)))) comps = comps.astype(sp.float64) if len(comps) == 0: return sp.zeros((len(obs), 1)) # build priors prior = sp.array([self._lpr_s] * len(comps), dtype=sp.float64) if with_noise: prior[-1] = self._lpr_n # get sigma try: sigma = self._ce.get_cmx(tf=self._tf).astype(sp.float64) except: return sp.zeros((len(obs), 1)) # calc log probs lpr = log_multivariate_normal_density(data, comps, sigma, 'tied') + prior logprob = logsumexp(lpr, axis=1) return sp.exp(lpr - logprob[:, sp.newaxis])
def time_combine_likelihoods_with_logsumexp(log_likelihoods, bin_start_list): """Combines log-likelihoods of features (cols) over time (rows) to reach n temporal bins ARGS log_likelihoods: matrix bin_start_list: has the starting index of each bin <int> RETURN time_binned_log_likelihoods: time binned log-likelihoods summed with logsumexp <tb_LLnumpy array> """ if len(bin_start_list) == 1: return logsumexp(log_likelihoods) n_bins = len(bin_start_list) n_columns = log_likelihoods.shape[1] time_binned_log_likelihoods = np.empty((n_bins,n_columns)) for idx in range(1, len(bin_start_list)-1): bin_start = bin_start_list[idx-1] bin_end = bin_start_list[idx] time_binned_log_likelihoods[idx] = logsumexp(log_likelihoods[bin_start:bin_end]) idx += 1 time_binned_log_likelihoods[idx] = logsumexp(log_likelihoods[idx:]) return time_binned_log_likelihoods
def ref_forward(log_transmat_T, log_startprob, frame_logprob, n_states): fwdlattice = np.zeros_like(frame_logprob) work_buffer = np.zeros(n_states) for i in range(n_states): fwdlattice[0, i] = log_startprob[i] + frame_logprob[0, i] for t in range(1, frame_logprob.shape[0]): for j in range(n_states): for i in range(n_states): work_buffer[i] = fwdlattice[t - 1, i] + log_transmat_T[j, i] fwdlattice[t, j] = logsumexp(work_buffer) + frame_logprob[t, j] return fwdlattice
def _do_forward_pass(self, framelogprob): n_observations = framelogprob.shape[0] state_combinations = [ tuple(x) for x in list( itertools.product(np.arange(self.n_states), repeat=self.n_chains)) ] fwdlattice = np.zeros((n_observations, self.n_states**self.n_chains)) fhmmc._forward(n_observations, self.n_chains, self.n_states, state_combinations, self.log_startprob, self.log_transmat, framelogprob, fwdlattice) return logsumexp(fwdlattice[-1]), fwdlattice