def mixmixlogp(value, point): floatX = theano.config.floatX priorlogp = st.dirichlet.logpdf(x=point['g_w'], alpha=np.ones(nbr)*0.0000001, ).astype(floatX) + \ st.expon.logpdf(x=point['mu_g']).sum(dtype=floatX) + \ st.dirichlet.logpdf(x=point['l_w'], alpha=np.ones(nbr)*0.0000001, ).astype(floatX) + \ st.expon.logpdf(x=point['mu_l']).sum(dtype=floatX) + \ st.dirichlet.logpdf(x=point['mix_w'], alpha=np.ones(2), ).astype(floatX) complogp1 = st.norm.logpdf(x=value, loc=point['mu_g']).astype(floatX) mixlogp1 = logsumexp(np.log(point['g_w']).astype(floatX) + complogp1, axis=-1, keepdims=True) complogp2 = st.lognorm.logpdf(value, 1., 0., np.exp(point['mu_l'])).astype(floatX) mixlogp2 = logsumexp(np.log(point['l_w']).astype(floatX) + complogp2, axis=-1, keepdims=True) complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1) mixmixlogpg = logsumexp(np.log(point['mix_w']).astype(floatX) + complogp_mix, axis=-1, keepdims=True) return priorlogp, mixmixlogpg
def logpdf(self, pts, pool=None): """Evaluate the logpdf of the KDE at `pts`.""" logpdfs = [logweight + kde(pts, pool=pool) for logweight, kde in zip(self._logweights, self._kdes)] if len(pts.shape) == 1: return logsumexp(logpdfs) else: return logsumexp(logpdfs, axis=0)
def _margtimedist_loglr(self, mf_snr, opt_snr): """Returns the log likelihood ratio marginalized over time and distance. """ logl = special.logsumexp(mf_snr, b=self._deltat) logl_marg = logl/self._dist_array opt_snr_marg = opt_snr/self._dist_array**2 return special.logsumexp(logl_marg - 0.5*opt_snr_marg, b=self._deltad*self.dist_prior)
def test_logsumexp_shape(): a = np.ones((1, 2, 3, 4)) b = np.ones_like(a) r = logsumexp(a, axis=2, b=b) assert_equal(r.shape, (1, 2, 4)) r = logsumexp(a, axis=(1, 3), b=b) assert_equal(r.shape, (1, 3))
def test_logsumexp_sign_shape(): a = np.ones((1,2,3,4)) b = np.ones_like(a) r, s = logsumexp(a, axis=2, b=b, return_sign=True) assert_equal(r.shape, s.shape) assert_equal(r.shape, (1,2,4)) r, s = logsumexp(a, axis=(1,3), b=b, return_sign=True) assert_equal(r.shape, s.shape) assert_equal(r.shape, (1,3))
def WAIC(self): # WAIC # from https://github.com/pymc-devs/pymc3/blob/02f0b7f9a487cf18e9a48b754b54c2a99cf9fba8/pymc3/stats.py # We get three different measurements: # waic: widely available information criterion # waic_se: standard error of waic # p_waic: effective number parameters log_py=np.atleast_2d(array([self.lnprob(theta) for theta in self.samples])).T lppd_i = logsumexp(log_py, axis=0, b=1.0 / len(log_py)) vars_lpd = np.var(log_py, axis=0) warn_mg = 0 if np.any(vars_lpd > 0.4): warnings.warn("""For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail see http://arxiv.org/abs/1507.04544 for details """) warn_mg = 1 waic_i = - 2 * (lppd_i - vars_lpd) waic = np.sum(waic_i) waic_se = np.sqrt(len(waic_i) * np.var(waic_i)) p_waic = np.sum(vars_lpd) self.waic={'waic': waic, 'waic_se':waic_se, 'p_waic':p_waic, } return waic,waic_se,p_waic
def mixing_posterior(mod, states): resid = mod.endog[:, 0] - states[0] # Construct the means (nobs x 7), variances (7,), prior probabilities (7,) means = ksc_params[None, :, 1] - 1.27036 variances = ksc_params[:, 2] prior_probabilities = ksc_params[:, 0] # Make dimensions compatible for broadcasting resid = np.repeat(resid[:, None], len(variances), axis=-1) variances = np.repeat(variances[None, :], mod.nobs, axis=0) prior_probabilities = np.repeat(prior_probabilities[None, :], mod.nobs, axis=0) # Compute loglikelihood (nobs x 7) loglikelihoods = -0.5 * ((resid - means)**2 / variances + np.log(2 * np.pi * variances)) # Get (values proportional to) the (log of the) posterior (nobs x 7) posterior_kernel = loglikelihoods + np.log(prior_probabilities) # Normalize to get the actual posterior probabilities tmp = logsumexp(posterior_kernel, axis=1) posterior_probabilities = np.exp(posterior_kernel - tmp[:, None]) return posterior_probabilities
def test_mixture_of_mvn(self): mu1 = np.asarray([0., 1.]) cov1 = np.diag([1.5, 2.5]) mu2 = np.asarray([1., 0.]) cov2 = np.diag([2.5, 3.5]) obs = np.asarray([[.5, .5], mu1, mu2]) with Model() as model: w = Dirichlet('w', floatX(np.ones(2)), transform=None) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) y = Mixture('x_obs', w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1), st.multivariate_normal.logpdf(obs, mu2, cov2)) ).T complogp = y.distribution._comp_logp(theano.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st, axis=-1, keepdims=True) assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model priorlogp = st.dirichlet.logpdf(x=testpoint['w'], alpha=np.ones(2), ) assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
def _global_jump(self, replicas_log_P_k): """ Global jump scheme. This method is described after Eq. 3 in [2] """ n_replica, n_states = self.n_replicas, self.n_states for replica_index, current_state_index in enumerate(self._replica_thermodynamic_states): neighborhood = self._neighborhood(current_state_index) # Compute unnormalized log probabilities for all thermodynamic states. log_P_k = np.zeros([n_states], np.float64) for state_index in neighborhood: u_k = self._energy_thermodynamic_states[replica_index, :] log_P_k[state_index] = - u_k[state_index] + self.log_weights[state_index] log_P_k -= logsumexp(log_P_k) # Update sampler Context to current thermodynamic state. P_k = np.exp(log_P_k[neighborhood]) new_state_index = np.random.choice(neighborhood, p=P_k) self._replica_thermodynamic_states[replica_index] = new_state_index # Accumulate statistics. replicas_log_P_k[replica_index,:] = log_P_k[:] self._n_proposed_matrix[current_state_index, neighborhood] += 1 self._n_accepted_matrix[current_state_index, new_state_index] += 1
def _margdist_loglr(self, mf_snr, opt_snr): """Returns the log likelihood ratio marginalized over distance. """ mf_snr_marg = mf_snr/self._dist_array opt_snr_marg = opt_snr/self._dist_array**2 return special.logsumexp(mf_snr_marg - 0.5*opt_snr_marg, b=self._deltad*self.dist_prior)
def log_forward(self, input): """Forward pass for sigmoid hidden layers and output softmax""" # Input tilde_z = input layer_inputs = [] # Hidden layers num_hidden_layers = len(self.parameters) - 1 for n in range(num_hidden_layers): # Store input to this layer (needed for backpropagation) layer_inputs.append(tilde_z) # Linear transformation weight, bias = self.parameters[n] z = np.dot(tilde_z, weight.T) + bias # Non-linear transformation (sigmoid) tilde_z = 1.0 / (1 + np.exp(-z)) # Store input to this layer (needed for backpropagation) layer_inputs.append(tilde_z) # Output linear transformation weight, bias = self.parameters[num_hidden_layers] z = np.dot(tilde_z, weight.T) + bias # Softmax is computed in log-domain to prevent underflow/overflow log_tilde_z = z - logsumexp(z, axis=1)[:, None] return log_tilde_z, layer_inputs
def log_forward(self, input): # Get parameters and sizes W_e, W_x, W_h, W_y = self.parameters hidden_size = W_h.shape[0] nr_steps = input.shape[0] # Embedding layer z_e = W_e[input, :] # Recurrent layer h = np.zeros((nr_steps + 1, hidden_size)) for t in range(nr_steps): # Linear z_t = W_x.dot(z_e[t, :]) + W_h.dot(h[t, :]) # Non-linear h[t+1, :] = 1.0 / (1 + np.exp(-z_t)) # Output layer y = h[1:, :].dot(W_y.T) # Softmax log_p_y = y - logsumexp(y, axis=1)[:, None] return log_p_y, y, h, z_e, input
def test_logsumexp_sign(): a = [1,1,1] b = [1,-1,-1] r, s = logsumexp(a, b=b, return_sign=True) assert_almost_equal(r,1) assert_equal(s,-1)
def logp_matches(self, mixture, latent_mix, z, npop, model): if theano.config.floatX == 'float32': rtol = 1e-4 else: rtol = 1e-7 test_point = model.test_point test_point['latent_m'] = test_point['m'] mix_logp = mixture.logp(test_point) logps = [] for component in range(npop): test_point['z'] = component * np.ones(z.distribution.shape) # Count the number of axes that should be broadcasted from z to # modify the logp sh1 = test_point['z'].shape sh2 = test_point['latent_m'].shape if len(sh1) > len(sh2): sh2 = (1,) * (len(sh1) - len(sh2)) + sh2 elif len(sh2) > len(sh1): sh1 = (1,) * (len(sh2) - len(sh1)) + sh1 reps = np.prod([s2 if s1 != s2 else 1 for s1, s2 in zip(sh1, sh2)]) z_logp = z.logp(test_point) * reps logps.append(z_logp + latent_mix.logp(test_point)) latent_mix_logp = logsumexp(np.array(logps), axis=0) assert_allclose(mix_logp, latent_mix_logp, rtol=rtol)
def test_logsumexp_sign_zero(): a = [1,1] b = [1,-1] r, s = logsumexp(a, b=b, return_sign=True) assert_(not np.isfinite(r)) assert_(not np.isnan(r)) assert_(r < 0) assert_equal(s,0)
def _do_forward_pass(self, framelogprob): n_samples, n_components = framelogprob.shape fwdlattice = np.zeros((n_samples, n_components)) _hmmc._forward(n_samples, n_components, log_mask_zero(self.startprob_), log_mask_zero(self.transmat_), framelogprob, fwdlattice) with np.errstate(under="ignore"): return logsumexp(fwdlattice[-1]), fwdlattice
def __call__(self, y_true, raw_predictions, average=True): one_hot_true = np.zeros_like(raw_predictions) prediction_dim = raw_predictions.shape[0] for k in range(prediction_dim): one_hot_true[k, :] = (y_true == k) loss = (logsumexp(raw_predictions, axis=0) - (one_hot_true * raw_predictions).sum(axis=0)) return loss.mean() if average else loss
def _margdistphase_loglr(self, mf_snr, opt_snr): """Returns the log likelihood ratio marginalized over distance and phase. """ logl = numpy.log(special.i0(mf_snr)) logl_marg = logl/self._dist_array opt_snr_marg = opt_snr/self._dist_array**2 return special.logsumexp(logl_marg - 0.5*opt_snr_marg, b=self._deltad*self.dist_prior)
def test_logsumexp(): a = np.random.normal(size=(200, 500, 5)) for axis in range(a.ndim): ans_ne = pymbar.utils.logsumexp(a, axis=axis) ans_no_ne = pymbar.utils.logsumexp(a, axis=axis, use_numexpr=False) ans_scipy = logsumexp(a, axis=axis) eq(ans_ne, ans_no_ne) eq(ans_ne, ans_scipy)
def log_forward(self, input=None): """Forward pass of the computation graph""" # Linear transformation z = np.dot(input, self.weight.T) + self.bias # Softmax implemented in log domain log_tilde_z = z - logsumexp(z, axis=1)[:, None] return log_tilde_z
def _compute_log_likelihood(self, X): n_samples, _ = X.shape res = np.zeros((n_samples, self.n_components)) for i in range(self.n_components): log_denses = self._compute_log_weighted_gaussian_densities(X, i) with np.errstate(under="ignore"): res[:, i] = logsumexp(log_denses, axis=1) return res
def _logsf(self, k, M, n, N): """ More precise calculation than log(sf) """ res = [] for quant, tot, good, draw in zip(k, M, n, N): # Integration over probability mass function using logsumexp k2 = np.arange(quant + 1, draw + 1) res.append(logsumexp(self._logpmf(k2, tot, good, draw))) return np.asarray(res)
def logpdf(self, pts, pool=None): """Evaluate the log-transdimensional-pdf at `pts` as estimated by the KDE.""" logpdfs = [] for logweight, space, kde in zip(self._logweights, self.spaces, self.kdes): # Calculate the probability for each parameter space individually if np.all(space == ~pts.mask) and np.isfinite(logweight): logpdfs.append(logweight + kde(pts[space], pool=pool)) return logsumexp(logpdfs, axis=0)
def _logcdf(self, k, M, n, N): res = [] for quant, tot, good, draw in zip(k, M, n, N): if (quant + 0.5) * (tot + 0.5) > (good - 0.5) * (draw - 0.5): # Less terms to sum if we calculate log(1-sf) res.append(log1p(-exp(self.logsf(quant, tot, good, draw)))) else: # Integration over probability mass function using logsumexp k2 = np.arange(0, quant + 1) res.append(logsumexp(self._logpmf(k2, tot, good, draw))) return np.asarray(res)
def compute_kl_with_logits(self, logits1, logits2): """Computes KL from logits samples from two distributions.""" def exp_times_diff(a, b): return np.multiply(np.exp(a), a - b) logsumexp1 = logsumexp(logits1, axis=1) logsumexp2 = logsumexp(logits2, axis=1) logsumexp_diff = logsumexp2 - logsumexp1 exp_diff = exp_times_diff(logits1, logits2) exp_diff = np.sum(exp_diff, axis=1) inv_exp_sum = np.sum(np.exp(logits1), axis=1) term1 = np.divide(exp_diff, inv_exp_sum) kl = term1 + logsumexp_diff kl = np.maximum(kl, 0.0) kl = np.nan_to_num(kl) return np.mean(kl)
def fit(self, obs, three_para): """Estimate model parameters. An initialization step is performed before entering the EM algorithm. If you want to avoid this step, pass proper ``init_params`` keyword argument to estimator's constructor. Parameters ---------- obs : list List of array-like observation sequences, each of which has shape (n_i, n_features), where n_i is the length of the i_th observation. Notes ----- In general, `logprob` should be non-decreasing unless aggressive pruning is used. Decreasing `logprob` is generally a sign of overfitting (e.g. a covariance parameter getting too small). You can fix this by getting more training data, or strengthening the appropriate subclass-specific regularization parameter. """ # what does this mean?? self._init(obs, self.init_params) logprob = [] for i in range(self.n_iter): # Expectation step stats = self._initialize_sufficient_statistics() curr_logprob = 0 for seq in obs: framelogprob = self._compute_log_likelihood(seq) lpr, fwdlattice = self._do_forward_pass(framelogprob) bwdlattice = self._do_backward_pass(framelogprob) gamma = fwdlattice + bwdlattice posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T curr_logprob += lpr self._accumulate_sufficient_statistics( stats, seq, framelogprob, posteriors, fwdlattice, bwdlattice) logprob.append(curr_logprob) # Check for convergence. if i > 0 and logprob[-1] - logprob[-2] < self.tol: break # Maximization step self._do_mstep(stats, three_para) #print("Logprob of all M-steps: %s" %logprob, file=sys.stderr) self.em_prob = logprob[-1] return self
def compute_pvalue(distr, N, side, current_p, x): """Compute log2 pvalue""" sum_num = [] sum_denum = [] it = range(N / 2 + 1) if side == 'r' else range(N + 1, -1, -1) for i in it: p1 = get_log_value(i, distr) p2 = get_log_value(N - i, distr) p = p1 + p2 if _comp(i, x, side, current_p, p): # if p > current_p: sum_num.append(p) sum_denum.append(p) if distr['distr_name'] == 'nb': sum_num = map(lambda x: float(x), sum_num) sum_denum = map(lambda x: float(x), sum_denum) return logsumexp(np.array(sum_num)) - (log(2) + logsumexp(np.array(sum_denum)))
def griffiths_log_prob_coal_counts(a,b,t,N): t = t/(2*N) n = a alpha = 1/2*n*t beta = -1/2*t h = eta(alpha,beta) mu = 2 * h * t**(-1) var = 2*h*t**(-1) * (h + beta)**2 var *= (1 + h/(h+beta) - h/alpha - h/(alpha + beta) - 2*h) var *= beta**-2 std = np.sqrt(var) return stats.norm.logpdf(b,mu,std) - logsumexp(stats.norm.logpdf(np.arange(1,a+1),mu,std))
def lnprob(params, nt, allFluxes, allFluxesVar, fmod_atZ, pmin, pmax): if np.any(params > pmax) or np.any(params < pmin): return - np.inf alphas = params[0:nt] betas = params[nt:2*nt][None, :] lnlike_grid = scalefree_flux_lnlikelihood_multiobj( allFluxes[:, None, :], allFluxesVar[:, None, :], fmod_atZ) # no, nt p_t = dirichlet(alphas) p_z = redshifts * np.exp(-0.5 * redshifts**2 / betas) / betas # p(z|t) p_z_t = p_z * p_t # no, nt lnlike_lt = logsumexp(lnlike_grid + np.log(p_z_t), axis=1) return - np.sum(lnlike_lt)
def logpdf(self, x): """ Evaluate the log of the estimated pdf on a provided set of points. """ points = atleast_2d(x) d, m = points.shape if d != self.d: if d == 1 and m == self.d: # points was passed in as a row vector points = reshape(points, (self.d, 1)) m = 1 else: msg = "points have dimension %s, dataset has dimension %s" % (d, self.d) raise ValueError(msg) result = zeros((m,), dtype=float) if m >= self.n: # there are more points than data, so loop over data energy = zeros((self.n, m), dtype=float) for i in range(self.n): diff = self.dataset[:, i, newaxis] - points tdiff = dot(self.inv_cov, diff) energy[i] = sum(diff*tdiff, axis=0) / 2.0 result = logsumexp(-energy, b=self.weights[i]*self.n/self._norm_factor, axis=0) else: # loop over points for i in range(m): diff = self.dataset - points[:, i, newaxis] tdiff = dot(self.inv_cov, diff) energy = sum(diff * tdiff, axis=0) / 2.0 result[i] = logsumexp(-energy, b=self.weights*self.n/self._norm_factor) return result
def _convert_raw_seg_to_seg(raw_seg, run_lengths, alg_name, min_value, max_value): s, b, p = raw_seg['log_weights'].shape # sequence length, batch size, number of particles act_dim = raw_seg['ac'].shape[-1] state_dim = raw_seg['ob'].shape[-1] mask = np.transpose(np.reshape(raw_seg['mask'], (s, p, b)),(0,2,1)) ob = np.transpose(np.reshape(raw_seg['ob'], (s, p, b, state_dim)),(0,2,1,3)) ac = np.transpose(np.reshape(raw_seg['ac'], (s, p, b, act_dim)),(0,2,1,3)) log_p_div_q = raw_seg['log_p_z'] + raw_seg['log_p_x_given_z'] - raw_seg['log_q_z'] log_p_div_q = np.transpose(np.reshape(log_p_div_q, (s, p, b)),(0,2,1)) log_p_div_q = log_p_div_q * mask log_p_xz = raw_seg['log_p_z'] + raw_seg['log_p_x_given_z'] log_p_xz = np.transpose(np.reshape(log_p_xz, (s, p, b)),(0,2,1)) log_p_xz = log_p_xz * mask initial_pr = np.zeros((1, b, p)) cum_prs = np.cumsum(log_p_div_q, axis=0) cum_prs = np.concatenate((initial_pr, cum_prs[:-1,:,:]), axis=0) # (s,b,p) ob = np.reshape(ob, (s*b*p, state_dim)) if alg_name == "reinforce": current_fef = 0 future_fef = 0 fef_weights = 0 future_fef_weights = 0 # compute hiah_var_coeff high_var_coeff = log_p_div_q - np.log(p) elif alg_name == "vimco": current_fef = 0 future_fef = 0 fef_weights = 0 future_fef_weights = 0 # compute hiah_var_coeff vimco_numerators = np.zeros((b,1,p)) vimco_denominators = np.zeros((b, 1, p)) for i in range(p): total_sum = np.sum(log_p_div_q * mask, axis=0) vimco_numerators = np.concatenate((vimco_numerators, np.reshape(total_sum, (b,1,p))), axis=1) total_sum[:,i] = (np.sum(total_sum, axis=-1) - total_sum[:,i]) / (p-1) total_sum = np.reshape(total_sum, (b,1,p)) vimco_denominators = np.concatenate((vimco_denominators, total_sum), axis=1) vimco_numerators = vimco_numerators[:,1:,:] vimco_numerators = logsumexp(vimco_numerators, axis=-1) vimco_denominators = vimco_denominators[:, 1:, :] vimco_denominators = logsumexp(vimco_denominators, axis=-1) high_var_coeff = vimco_numerators - vimco_denominators elif alg_name == "vifle" or alg_name == "fr": current_fef = np.transpose(np.reshape(raw_seg['fef_vpred'], (s, p, b)), (0,2,1)) current_fef = current_fef * mask # (s,b,p) # future_value: log(V) future_fef = np.concatenate((current_fef[1:,:,:], np.zeros((1,b,p))), axis=0) fef_weights = np.zeros([s,b]) future_fef_weights = np.zeros([s,b]) for i in range(b): fef_weights[:run_lengths[i], i] = np.arange(run_lengths[i])[::-1] + 1 future_fef_weights[:run_lengths[i] - 1, i] = np.arange(run_lengths[i] - 1)[::-1] + 1 # compute hiah_var_coeff if alg_name == "vifle": total_sum = np.sum(log_p_div_q * mask, axis=0, keepdims=True) # (1,b,p) total_sum = np.broadcast_to(total_sum, (s,b,p)) vifle_numerators = np.tile(np.expand_dims(total_sum, axis=3),(1,1,1,p)) vifle_denominators = np.tile(np.expand_dims(total_sum, axis=3),(1,1,1,p)) for i in range(p): vifle_numerators[:,:,i,i] = (cum_prs + log_p_div_q)[:, :, i] + future_fef[:,:,i] * future_fef_weights vifle_denominators[:,:,i,i] = cum_prs[:, :, i] + current_fef[:,:,i] * fef_weights vifle_numerators = logsumexp(vifle_numerators, axis=-1) vifle_denominators = logsumexp(vifle_denominators, axis=-1) high_var_coeff = vifle_numerators - vifle_denominators else: fr_numerators = cum_prs + log_p_div_q + future_fef * future_fef_weights fr_numerators = logsumexp(fr_numerators, axis=-1) fr_denominators = cum_prs + current_fef * fef_weights fr_denominators = logsumexp(fr_denominators, axis=-1) high_var_coeff = fr_numerators - fr_denominators else: raise ValueError("Undefined alg_name %s" % alg_name) seg = { 'ob': ob, 'ac': ac, 'mask': mask, 'log_p_xz': log_p_xz, 'high_var_coeff': high_var_coeff, 'target_pr': log_p_div_q, 'future_fef': future_fef, 'current_fef': current_fef, 'fef_weights': fef_weights, 'future_fef_weights': future_fef_weights } return seg
def conditional_probability_of_n_purchases_up_to_time( self, n, t, frequency, recency, T): """ Return conditional probability of n purchases up to time t. Calculate the probability of n purchases up to time t for an individual with history frequency, recency and T (age). The main equation being implemented is (16) from: http://www.brucehardie.com/notes/028/pareto_nbd_conditional_pmf.pdf Parameters ---------- n: int number of purchases. t: a scalar time up to which probability should be calculated. frequency: float historical frequency of customer. recency: float historical recency of customer. T: float age of the customer. Returns ------- array_like """ if t <= 0: return 0 x, t_x = frequency, recency params = self._unload_params("r", "alpha", "s", "beta") r, alpha, s, beta = params if alpha < beta: min_of_alpha_beta, max_of_alpha_beta, p, _, _ = (alpha, beta, r + x + n, r + x, r + x + 1) else: min_of_alpha_beta, max_of_alpha_beta, p, _, _ = (beta, alpha, s + 1, s + 1, s) abs_alpha_beta = max_of_alpha_beta - min_of_alpha_beta log_l = self._conditional_log_likelihood(params, x, t_x, T) log_p_zero = (gammaln(r + x) + r * log(alpha) + s * log(beta) - (gammaln(r) + (r + x) * log(alpha + T) + s * log(beta + T) + log_l)) log_B_one = ( gammaln(r + x + n) + r * log(alpha) + s * log(beta) - (gammaln(r) + (r + x + n) * log(alpha + T + t) + s * log(beta + T + t))) log_B_two = ( r * log(alpha) + s * log(beta) + gammaln(r + s + x) + betaln(r + x + n, s + 1) + log( hyp2f1(r + s + x, p, r + s + x + n + 1, abs_alpha_beta / (max_of_alpha_beta + T))) - (gammaln(r) + gammaln(s) + (r + s + x) * log(max_of_alpha_beta + T))) def _log_B_three(i): return (r * log(alpha) + s * log(beta) + gammaln(r + s + x + i) + betaln(r + x + n, s + 1) + log( hyp2f1(r + s + x + i, p, r + s + x + n + 1, abs_alpha_beta / (max_of_alpha_beta + T + t))) - (gammaln(r) + gammaln(s) + (r + s + x + i) * log(max_of_alpha_beta + T + t))) zeroth_term = (n == 0) * (1 - exp(log_p_zero)) first_term = n * log(t) - gammaln(n + 1) + log_B_one - log_l second_term = log_B_two - log_l third_term = logsumexp([ i * log(t) - gammaln(i + 1) + _log_B_three(i) - log_l for i in range(n + 1) ], axis=0) try: size = len(x) sign = np.ones(size) except TypeError: sign = 1 # In some scenarios (e.g. large n) tiny numerical errors in the calculation of second_term and third_term # cause sumexp to be ever so slightly negative and logsumexp throws an error. Hence we ignore the sign here. return zeroth_term + exp( logsumexp([first_term, second_term, third_term], b=[sign, sign, -sign], axis=0, return_sign=True)[0])
def log_likelihood_ratio(self): waveform_polarizations =\ self.waveform_generator.frequency_domain_strain(self.parameters) if waveform_polarizations is None: return np.nan_to_num(-np.inf) matched_filter_snr_squared = 0 optimal_snr_squared = 0 matched_filter_snr_squared_tc_array = np.zeros( self.interferometers.frequency_array[0:-1].shape, dtype=np.complex128) for interferometer in self.interferometers: signal_ifo = interferometer.get_detector_response( waveform_polarizations, self.parameters) matched_filter_snr_squared += interferometer.matched_filter_snr_squared( signal=signal_ifo) optimal_snr_squared += interferometer.optimal_snr_squared( signal=signal_ifo) if self.time_marginalization: matched_filter_snr_squared_tc_array +=\ 4 / self.waveform_generator.duration * np.fft.fft( signal_ifo[0:-1] * interferometer.frequency_domain_strain.conjugate()[0:-1] / interferometer.power_spectral_density_array[0:-1]) if self.time_marginalization: if self.distance_marginalization: rho_mf_ref_tc_array, rho_opt_ref = self._setup_rho( matched_filter_snr_squared_tc_array, optimal_snr_squared) if self.phase_marginalization: dist_marged_log_l_tc_array = self._interp_dist_margd_loglikelihood( abs(rho_mf_ref_tc_array), rho_opt_ref) log_l = logsumexp(dist_marged_log_l_tc_array, b=self.time_prior_array) else: dist_marged_log_l_tc_array = self._interp_dist_margd_loglikelihood( rho_mf_ref_tc_array.real, rho_opt_ref) log_l = logsumexp(dist_marged_log_l_tc_array, b=self.time_prior_array) elif self.phase_marginalization: log_l = logsumexp( self._bessel_function_interped( abs(matched_filter_snr_squared_tc_array)), b=self.time_prior_array) - optimal_snr_squared / 2 else: log_l = logsumexp( matched_filter_snr_squared_tc_array.real, b=self.time_prior_array) - optimal_snr_squared / 2 elif self.distance_marginalization: rho_mf_ref, rho_opt_ref = self._setup_rho( matched_filter_snr_squared, optimal_snr_squared) if self.phase_marginalization: rho_mf_ref = abs(rho_mf_ref) log_l = self._interp_dist_margd_loglikelihood( rho_mf_ref.real, rho_opt_ref)[0] elif self.phase_marginalization: matched_filter_snr_squared = self._bessel_function_interped( abs(matched_filter_snr_squared)) log_l = matched_filter_snr_squared - optimal_snr_squared / 2 else: log_l = matched_filter_snr_squared.real - optimal_snr_squared / 2 return log_l.real
def _loss_grad_lbfgs(self, transformation, X, mask, sign=1.0): """Compute the loss and the loss gradient w.r.t. ``transformation``. Parameters ---------- transformation : array, shape (n_components, n_features) The linear transformation on which to compute loss and evaluate gradient X : array, shape (n_samples, n_features) The training samples. mask : array, shape (n_samples, n_samples) A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong to the same class, and ``0`` otherwise. Returns ------- loss : float The loss computed for the given transformation. gradient : array, shape (n_components * n_features,) The new (flattened) gradient of the loss. """ if self.n_iter_ == 0: self.n_iter_ += 1 if self.verbose: header_fields = ['Iteration', 'Objective Value', 'Time(s)'] header_fmt = '{:>10} {:>20} {:>10}' header = header_fmt.format(*header_fields) cls_name = self.__class__.__name__ print('[{}]'.format(cls_name)) print('[{}] {}\n[{}] {}'.format(cls_name, header, cls_name, '-' * len(header))) t_funcall = time.time() transformation = transformation.reshape(-1, X.shape[1]) X_embedded = np.dot(X, transformation.T) # (n_samples, n_components) # Compute softmax distances p_ij = pairwise_distances(X_embedded, squared=True) np.fill_diagonal(p_ij, np.inf) p_ij = np.exp(-p_ij - logsumexp(-p_ij, axis=1)[:, np.newaxis]) # (n_samples, n_samples) # Compute loss masked_p_ij = p_ij * mask p = np.sum(masked_p_ij, axis=1, keepdims=True) # (n_samples, 1) loss = np.sum(p) # Compute gradient of loss w.r.t. `transform` weighted_p_ij = masked_p_ij - p_ij * p gradient = 2 * (X_embedded.T.dot(weighted_p_ij + weighted_p_ij.T) - X_embedded.T * np.sum(weighted_p_ij, axis=0)).dot(X) # time complexity: O(n_components x n_samples x # min(n_samples, n_features)) if self.verbose: t_funcall = time.time() - t_funcall values_fmt = '[{}] {:>10} {:>20.6e} {:>10.2f}' print( values_fmt.format(self.__class__.__name__, self.n_iter_, loss, t_funcall)) sys.stdout.flush() return sign * loss, sign * gradient.ravel()
def normalize_log(l): return np.exp(l - logsumexp(l)).flatten()
def get_ess(logw_norm): return np.exp(-logsumexp(2 * logw_norm))
def test_logsumexp(): # Test whether logsumexp() function correctly handles large inputs. a = np.arange(200) desired = np.log(np.sum(np.exp(a))) assert_almost_equal(logsumexp(a), desired) # Now test with large numbers b = [1000, 1000] desired = 1000.0 + np.log(2.0) assert_almost_equal(logsumexp(b), desired) n = 1000 b = np.full(n, 10000, dtype='float64') desired = 10000.0 + np.log(n) assert_almost_equal(logsumexp(b), desired) x = np.array([1e-40] * 1000000) logx = np.log(x) X = np.vstack([x, x]) logX = np.vstack([logx, logx]) assert_array_almost_equal(np.exp(logsumexp(logX)), X.sum()) assert_array_almost_equal(np.exp(logsumexp(logX, axis=0)), X.sum(axis=0)) assert_array_almost_equal(np.exp(logsumexp(logX, axis=1)), X.sum(axis=1)) # Handling special values properly assert_equal(logsumexp(np.inf), np.inf) assert_equal(logsumexp(-np.inf), -np.inf) assert_equal(logsumexp(np.nan), np.nan) assert_equal(logsumexp([-np.inf, -np.inf]), -np.inf) # Handling an array with different magnitudes on the axes assert_array_almost_equal( logsumexp([[1e10, 1e-10], [-1e10, -np.inf]], axis=-1), [1e10, -1e10]) # Test keeping dimensions assert_array_almost_equal( logsumexp([[1e10, 1e-10], [-1e10, -np.inf]], axis=-1, keepdims=True), [[1e10], [-1e10]]) # Test multiple axes assert_array_almost_equal( logsumexp([[1e10, 1e-10], [-1e10, -np.inf]], axis=(-1, -2)), 1e10)
def test_logsumexp_b_shape(): a = np.zeros((4, 1, 2, 1)) b = np.ones((3, 1, 5)) logsumexp(a, b=b)
def test_logsumexp_b_zero(): a = [1, 10000] b = [1, 0] assert_almost_equal(logsumexp(a, b=b), 1)
def log_prob(self, x): return logsumexp(self.log_prob_components(x) + np.log(self.priors)[:, None], axis=0)
def prob_by_vocab_overlap_sent(): base_lans = ["aze"] #ts = [0.01, 0.05, 0.1, 0.1] #argmaxs = [False, False, False, True] ts = [0.1] argmaxs = [True] for base_lan in base_lans: for t, argmax in zip(ts, argmaxs): trg2srcs = {} lan_lists = [l.strip() for l in open("langs.txt", 'r').readlines()] lans = [] for l in lan_lists: if l != base_lan: lans.append(l) lan_lists = lans out_probs = [] for i, lan in enumerate(lan_lists): lm_file = "lmll/ted-train.mtok.{}.{}-vocab".format(lan, base_lan) lm_score = [float(l) for l in open(lm_file, 'r').readlines()] trg_file = "data/{}_eng/ted-train.mtok.spm8000.eng".format(lan) trg_sents = open(trg_file, 'r').readlines() out_probs.append([0 for _ in range(len(trg_sents))]) line = 0 for j, trg in enumerate(trg_sents): if trg not in trg2srcs: trg2srcs[trg] = [] trg2srcs[trg].append([i, line, lm_score[j]]) line += 1 print("eng size: {}".format(len(trg2srcs))) for trg, src_list in trg2srcs.items(): if argmax: max_score = 0 for s in src_list: max_score = max(s[2], max_score) for s in src_list: if s[2] == max_score: out_probs[s[0]][s[1]] = 1 else: out_probs[s[0]][s[1]] = 0 else: sum_score = 0 log_score = [] for s in src_list: #s[2] = np.exp(-s[2] / t) #sum_score += s[2] s[2] = s[2] / t log_score.append(s[2]) sum_score = logsumexp(log_score) for s in src_list: #s[2] = s[2] / sum_score s[2] = np.exp(s[2] - sum_score) out_probs[s[0]][s[1]] = s[2] for i, lan in enumerate(lan_lists): if argmax: out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-am".format(lan, lan, base_lan), "w") else: out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-t{}".format(lan, lan, base_lan, t), "w") #out = open(data_dir + "{}_en/ted-train.mtok.{}.prob-rank-{}-t{}-k{}-el".format(lan, lan, base_lan, t, k), "w") for p in out_probs[i]: out.write("{}\n".format(p)) out.close() if argmax: out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-am".format(base_lan, base_lan, base_lan), "w") else: out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-t{}".format(base_lan, base_lan, base_lan, t), "w") #out = open(data_dir + "{}_en/ted-train.mtok.{}.prob-rank-{}-t{}-k{}".format(base_lan, base_lan, base_lan, t, k), "w") base_lines = len(open("data/{}_eng/ted-train.mtok.spm8000.eng".format(base_lan)).readlines()) #base_lines = len(open(data_dir + "{}_en/ted-train.mtok.spm8000.en".format(base_lan)).readlines()) for i in range(base_lines): out.write("{}\n".format(1)) out.close()
def plot_node(network, models, models_err, pos=None, idx=None, models_x=None, Nrsamp=1, Nmc=5, node_kwargs=None, violin_kwargs=None, rstate=None, discrete=False, *args, **kwargs): """ Plot a 2-D projection of the network colored by the chosen variable. Parameters ---------- network : `~frankenz.networks._Network`-derived object The trained and populated network object. models : `~numpy.ndarray` with shape (Nobj, Ndim) The models mapped onto the network. models_err : `~numpy.ndarray` with shape (Nobj, Ndim) Errors on the models. pos : tuple of shape (Nproj), optional The `Nproj`-dimensional position of the node. Mutually exclusive with `idx`. idx : int, optional Index of the node. Mutually exclusive with `pos`. models_x : `~numpy.ndarray` with shape (Ndim), optional The `x` values corresponding to the `Ndim` model values. Nrsamp : int, optional Number of times to resample the weighted collection of models associated with the given node. Default is `1`. Nmc : int, optional The number of Monte Carlo realizations of the model values if the errors are provided. Default is `5`. node_kwargs : kwargs, optional Keyword arguments to be passed to `~matplotlib.pyplot.plot` when plotting the node model. violin_kwargs : kwargs, optional Keyword arguments to be passed to `~matplotlib.pyplot.violinplot` when plotting the distribution of model values. rstate : `~numpy.random.RandomState` instance, optional Random state instance. If not passed, the default `~numpy.random` instance will be used. discrete : bool, optional Whether to assign weights based **only** on the best-fitting node rather than all nodes an object might be associated with. Default is `False`. """ # Initialize values. if node_kwargs is None: node_kwargs = dict() if violin_kwargs is None: violin_kwargs = dict() if rstate is None: rstate = np.random if idx is None and pos is None: raise ValueError("Either `idx` or `pos` must be specified.") elif idx is not None and pos is not None: raise ValueError("Both `idx` and `pos` cannot be specified.") if models_x is None: models_x = np.arange(models.shape[-1]) + 1 node_kwargs['color'] = node_kwargs.get('color', 'black') node_kwargs['marker'] = node_kwargs.get('marker', '*') node_kwargs['markersize'] = node_kwargs.get('markersize', '10') node_kwargs['alpha'] = node_kwargs.get('alpha', 0.6) violin_kwargs['widths'] = violin_kwargs.get('widths', 600) violin_kwargs['showextrema'] = violin_kwargs.get('showextrema', False) # Get node. (idx, node_model, pos, idxs, logwts, scales, scales_err) = network.get_node(pos=pos, idx=idx, discrete=discrete) tmodels, tmodels_err = models[idxs], models_err[idxs] # grab models wts = np.exp(logwts - logsumexp(logwts)) # compute weights # Resample models. Nmatch = len(idxs) idx_rsamp = rstate.choice(Nmatch, p=wts, size=Nmatch * Nrsamp) # Perturb model values. tmodels_mc = rstate.normal(tmodels[idx_rsamp], tmodels_err[idx_rsamp]) # Rescale results. snorm = np.mean(np.array(scales)[idx_rsamp]) tmodels_mc /= (np.array(scales)[idx_rsamp, None] / snorm) # Rescale baseline model (correction should be small in most cases). mean_model = np.mean(tmodels_mc, axis=0) std_model = np.std(tmodels_mc, axis=0) num = np.dot(mean_model / std_model, node_model / std_model) den = np.dot(node_model / std_model, node_model / std_model) node_scale = num / den if abs(node_scale - 1.) < 0.05: node_scale = 1. # Plot results. plt.plot(models_x, node_model * node_scale, **node_kwargs) for i in range(models.shape[-1]): vals = tmodels_mc[:, i] plt.violinplot(vals, [models_x[i]], **violin_kwargs) plt.ylim( [min(mean_model - 3 * std_model), max(mean_model + 3 * std_model)])
def _transform(self, p_h, ionic_strength, temperature): return -R * temperature * logsumexp( self._dG0_prime_vector(p_h, ionic_strength, temperature) / (-R * temperature))
def _approx_bound(self, X, doc_topic_distr, sub_sampling): """Estimate the variational bound. Estimate the variational bound over "all documents" using only the documents passed in as X. Since log-likelihood of each word cannot be computed directly, we use this bound to estimate it. Parameters ---------- X : array-like or sparse matrix, shape=(n_samples, n_features) Document word matrix. doc_topic_distr : array, shape=(n_samples, n_components) Document topic distribution. In the literature, this is called gamma. sub_sampling : boolean, optional, (default=False) Compensate for subsampling of documents. It is used in calculate bound in online learning. Returns ------- score : float """ def _loglikelihood(prior, distr, dirichlet_distr, size): # calculate log-likelihood score = np.sum((prior - distr) * dirichlet_distr) score += np.sum(gammaln(distr) - gammaln(prior)) score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1))) return score is_sparse_x = sp.issparse(X) n_samples, n_components = doc_topic_distr.shape n_features = self.components_.shape[1] score = 0 dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr) dirichlet_component_ = _dirichlet_expectation_2d(self.components_) doc_topic_prior = self.doc_topic_prior_ topic_word_prior = self.topic_word_prior_ if is_sparse_x: X_data = X.data X_indices = X.indices X_indptr = X.indptr # E[log p(docs | theta, beta)] for idx_d in range(0, n_samples): if is_sparse_x: ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]] cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]] else: ids = np.nonzero(X[idx_d, :])[0] cnts = X[idx_d, ids] temp = (dirichlet_doc_topic[idx_d, :, np.newaxis] + dirichlet_component_[:, ids]) norm_phi = logsumexp(temp, axis=0) score += np.dot(cnts, norm_phi) # compute E[log p(theta | alpha) - log q(theta | gamma)] score += _loglikelihood(doc_topic_prior, doc_topic_distr, dirichlet_doc_topic, self.n_components) # Compensate for the subsampling of the population of documents if sub_sampling: doc_ratio = float(self.total_samples) / n_samples score *= doc_ratio # E[log p(beta | eta) - log q (beta | lambda)] score += _loglikelihood(topic_word_prior, self.components_, dirichlet_component_, n_features) return score
def Parallel_estimate_mixture_params(EmissionParameters, curr_counts_orig, curr_nr_of_counts_orig, curr_state, rand_sample_size, max_nr_iter, nr_of_iter=20, stop_crit=1.0, nr_of_init=10, verbosity=1): ''' This function estimates thedirichlet multinomial mixture parameters ''' #1) Copy old parameters and use it as initialisation for the first iteration alphas_list = [] mixtures_list = [] lls_list = [] curr_counts = deepcopy(curr_counts_orig) curr_nr_of_counts = deepcopy(curr_nr_of_counts_orig) if len(curr_counts.shape) == 1: curr_counts = np.expand_dims(curr_counts, axis=1) if np.sum(np.sum(curr_counts, axis=0) > 0) > 0: curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) >0] curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) >0] #Test for fitting distributions only on diag events if np.sum( np.sum(curr_counts, axis=0) > 10) > 10: curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) > 10] curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) > 10] tracks_per_rep = EmissionParameters['Diag_event_params']['alpha'][curr_state].shape[0] NrOfReplicates = curr_counts.shape[0] / tracks_per_rep if len(curr_counts.shape) == 1: curr_counts = np.expand_dims(curr_counts, axis=1) #Save old lls mixtures and alphas mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state]) scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1])) ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts))) alphas_list.append(deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state])) mixtures_list.append(deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state])) lls_list.append(ll) np_proc = EmissionParameters['NbProc'] data = zip(itertools.repeat(stop_crit), itertools.repeat(rand_sample_size), itertools.repeat(max_nr_iter), list(range(nr_of_init)), itertools.repeat(EmissionParameters), itertools.repeat(curr_state), itertools.repeat(curr_counts), itertools.repeat(curr_nr_of_counts) ) if np_proc == 1: results = [Parallel_estimate_single_mixture_params(args) for args in data] else: print("Spawning processes") pool = multiprocessing.Pool(np_proc, maxtasksperchild=5) results = pool.imap(Parallel_estimate_single_mixture_params, data, chunksize=1) pool.close() pool.join() print("Collecting results") results = [res for res in results] alphas_list += [res[0] for res in results] mixtures_list += [res[1] for res in results] lls_list += [res[2] for res in results] #select which alpha had the highest ll max_ll_pos = np.argmax(np.array(lls_list)) #pdb.set_trace() alpha = alphas_list[max_ll_pos] mixtures = mixtures_list[max_ll_pos] return alpha, mixtures
def posteriorPredictive(self, yi, y, return_params=False, orient=False): y = np.array(y) if not y.size: return self.nullPosteriorPredictive(yi) if not y.size or len(y.shape) == 1 or len(y.shape) == 0: y = y.reshape(1, -1) N = len(y) # number of observations K = len(y[0]) # number of dimensions of each observation for i in range(N): assert len(y[i]) == K yi = np.array(yi) if len(yi.shape) > 1 and yi.shape[0] == 1: # yi i yi = yi[0] try: assert (len(yi) == K) except TypeError: # if yi is zero-dimensional yi = np.array([ yi, ]) # p(yi|D) = int_params p(yi|params) p(params|D) # p(params|D) = p(D|params) p(params|params_0) / p(D) # p(yi|params) is predictive likelihood (need full matrix) # construct p(params|D) matrix from p(D|params) matrix (marginal likelihood likelihood matrix) # and p(params|params_0) (marginal likelihood prior matrix) # p(D) is marginal likelihood _, pYk, pParams, pYkGivenParams, mv, lv = self.marginalLikelihood( y, return_dists=True) pParamsGivenYk = np.array(pYkGivenParams) for k in range(K): pParamsGivenYk[k] = pParams + pYkGivenParams[k] - pYk[k] mus, lambdas = self.mus, self.lambdas logdmudlambda = np.log(1. / len(mus) * 1. / len(lambdas)) pYikGivenParams = np.empty((K, len(mus), len(lambdas))) # pdf(x, lambda, mu) = pdf(x-mu, lambda), so instead of looping over mu, # we calculate pdf(x-mus, lambda) for l in range(len(lambdas)): for k in range(K): # multiply by dmu dlambda when converting from pdf to discrete probability pYikGivenParams[k, :, l] = stats.vonmises.logpdf( yi[k] - mus, lambdas[l]) + logdmudlambda if orient: pYikGivenYkWithOffset = np.empty((K, len(mus))) for k in range(K): p = pParamsGivenYk[k] for offset in range(len(mus)): l = np.roll(pYikGivenParams[k], offset, axis=0) pYikGivenYkWithOffset[k, offset] = logsumexp(p + l) # sum over k pYiGivenYWithOffset = np.sum(pYikGivenYkWithOffset, axis=0) bestOffset = np.argmax(pYiGivenYWithOffset) dmu = mus[1] - mus[0] orientation = np.mod(-bestOffset * dmu, 2 * np.pi) return pYiGivenYWithOffset[bestOffset], orientation else: # p(yi|D) = prod_k p(yi[k]|D[k]), equivalently sum of logs pYikGivenYk = np.array(yi) for k in range(K): pYikGivenYk[k] = logsumexp(pParamsGivenYk[k] + pYikGivenParams[k]) return np.sum(pYikGivenYk)
def log_normalize_log(unnormalized): return unnormalized - logsumexp(unnormalized)
def marginalLikelihood(self, y, return_dists=False): y = np.array(y) if not y.size: return 0.0 # log likelihood of 0 if y is empty if not y.size or len(y.shape) == 1 or len(y.shape) == 0: y = y.reshape(1, -1) N = len(y) # number of observations K = len(y[0]) # number of dimensions of each observation for i in range(N): assert len(y[i]) == K mus, lambdas = self.mus, self.lambdas logdmudlambda = np.log(1. / len(mus) * 1. / len(lambdas)) pParams = self.logprior try: pYkGivenParams = self.marginalLikelihoodCache[y.tobytes()] except KeyError: pYkGivenParams = np.empty((K, len(mus), len(lambdas))) # pdf(x, lambda, mu) = pdf(x-mu, lambda), so instead of looping over mu, # we calculate pdf(x-mus, lambda) for l in range(len(lambdas)): for k in range(K): yikxmu = np.tile(y[:, k, np.newaxis], len(mus)) # multiply by dmu dlambda when converting from pdf to discrete probability tmp = stats.vonmises.logpdf(yikxmu - mus, lambdas[l]) + logdmudlambda # sum over yi in y pYkGivenParams[k, :, l] = np.sum(tmp, axis=0) self.marginalLikelihoodCache[y.tobytes()] = pYkGivenParams pYk = np.empty((K, )) for k in range(K): pYk[k] = logsumexp(pParams + pYkGivenParams[k]) # sum over k pY = np.sum(pYk) if return_dists: plot_dists = False if plot_dists: plt.figure() plt.subplot(2, 2, 1) plt.imshow(pParams, aspect='auto', vmin=-50, vmax=0, origin='lower', extent=[ np.min(lambdas), np.max(lambdas), np.min(mus), np.max(mus) ]) plt.ylabel('mu') plt.title('prior over parameters') plt.colorbar() plt.subplot(2, 2, 2) plt.imshow(pYkGivenParams[0], aspect='auto', vmin=-50, vmax=0, origin='lower', extent=[ np.min(lambdas), np.max(lambdas), np.min(mus), np.max(mus) ]) plt.colorbar() plt.xlabel('lambda') plt.title('likelihood of data given parameters') plt.subplot(2, 2, 3) plt.imshow(pParams + pYkGivenParams[0], aspect='auto', vmin=-50, vmax=0, origin='lower', extent=[ np.min(lambdas), np.max(lambdas), np.min(mus), np.max(mus) ]) plt.ylabel('mu') plt.xlabel('lambda') plt.title('posterior of data') plt.colorbar() if return_dists: return pY, pYk, pParams, pYkGivenParams, self.mv, self.lv return pY
def softmax(x, axis=None) -> np.ndarray: """ Computes sofmax of input vector """ return np.exp(x - logsumexp(x, axis=axis, keepdims=True))
def prefix_search_log_cy(y_, alphabet=DNA_alphabet, return_forward=False): y = y_.astype(np.float64) # initialize prefix search variables stop_search = False search_level = 0 top_label = '' curr_label = '' curr_label_alphas = [] gap_prob = np.sum(y[:, -1]) label_prob = {'': gap_prob} # initalize variables for 1d forward probabilities alpha_prev = decoding_cy.forward_vec_log(-1, search_level, y) #print(alpha_prev) top_forward = np.array([]) prefix_forward = np.zeros(shape=(len(alphabet), len(y), len(y))) + LOG_0 while not stop_search: prefix_prob = {} prefix_alphas = [] search_level += 1 for c, c_i in alphabet.items(): prefix = curr_label + c prefix_int = [alphabet[i] for i in prefix] if c_i == 0: best_prefix = prefix alpha_ast = forward_vec_no_gap_log(prefix_int, y, alpha_prev) prefix_prob[prefix] = logsumexp(alpha_ast) # calculate label probability alpha = decoding_cy.forward_vec_log(c_i, search_level, y, previous=alpha_prev) prefix_forward[c_i, search_level - 1] = alpha label_prob[prefix] = alpha[-1] if label_prob[prefix] > label_prob[top_label]: top_label = prefix top_forward = prefix_forward[c_i, :len(prefix)] #print(len(top_label),len(top_forward)) if prefix_prob[prefix] > prefix_prob[best_prefix]: best_prefix = prefix prefix_alphas.append(alpha) #print(search_level, 'extending by prefix:',c, 'Prefix Probability:',prefix_prob[prefix], 'Label probability:',label_prob[prefix], file=sys.stderr) #best_prefix = max(prefix_prob.items(), key=operator.itemgetter(1))[0] #print('best prefix is:',best_prefix, file=sys.stderr) if prefix_prob[best_prefix] < label_prob[top_label]: stop_search = True else: # get highest probability label #top_label = max(label_prob.items(), key=operator.itemgetter(1))[0] # then move to prefix with highest prefix probability curr_label = best_prefix alpha_prev = prefix_alphas[alphabet[curr_label[-1]]] if return_forward: return (top_label, top_forward.T) else: return (top_label, label_prob[top_label])
def em_pmf(q, eps=1.0, sensitivity=1.0, monotonic=False): coef = 1.0 if monotonic else 0.5 q = q - q.max() logits = coef*eps/sensitivity*q return np.exp(logits - logsumexp(logits))
def scipy_fun(array_to_reduce): return osp_special.logsumexp(array_to_reduce, axis, keepdims=keepdims, return_sign=return_sign)
def train(speaker, X, M=8, epsilon=0.0, maxIter=20): ''' Train a model for the given speaker. Returns the theta (omega, mu, sigma)''' myTheta = theta(speaker, M, X.shape[1]) #print ('TODO') # define variables T, d = X.shape # initialize ind = random.sample(range(T), M) myTheta.mu = X[np.array(ind)] myTheta.Sigma = np.ones( (M, d)) # this Mxd matrix consists of M diagonals of dxd matrix myTheta.omega[..., 0] = float(1) / M i = 0 prev_L = float('-inf') improvement = float('inf') # log_Bs = np.zeros((M, T)) # log_Ps = np.zeros((M, T)) while i <= maxIter and improvement >= epsilon: preComputedForM = np.array(preCompute(myTheta)).reshape( (M, 1)) # M x 1 # # compute log_Bs # # nested loop --- really slow for training # for m in tqdm(range(0, M)): # for t in tqdm(range(0, T)): # # log_Bs[m, t] = log_b_m_x( m, X[t], myTheta ) # log_Ps[m, t] = log_p_m_x( m, X[t], myTheta ) # print("for loop: {}".format(log_Ps)) # for efficiency, use matrix operation to compute log_Bs sigmaSquare = np.reciprocal(myTheta.Sigma, where=(myTheta.Sigma != 0)) # M x d xSquare = (0.5 * (X**2)).T # d x T term1 = (-1) * np.dot(sigmaSquare, xSquare) # M x T term2 = np.multiply(myTheta.mu, sigmaSquare) # M x d term3 = np.dot(term2, X.T) # M x T log_Bs = term1 + term3 - preComputedForM # print(log_Bs) # compute likelihood and update loop constraints L = logLik(log_Bs, myTheta) improvement = L - prev_L prev_L = L i += 1 # compute Ps for the purpose of updating parameters # term4 = myTheta.omega * np.exp(log_Bs) # M x T # term5 = np.sum(term4, axis=0) # 1 x T # Ps = np.divide(term4, term5, out=np.zeros_like(term4), where=(term5 > 0)) # M x T # use logsumexp to compute in a more stable way term4 = np.log(myTheta.omega) + log_Bs - logsumexp( log_Bs, b=myTheta.omega, axis=0) Ps = np.exp(term4) # make sure Ps >= 0 # print(term4) # print(Ps) # update parameters term6 = np.sum(Ps, axis=1).reshape((M, 1)) myTheta.omega = term6 / float(T) # M times 1 term7 = np.dot(Ps, X) myTheta.mu = np.divide( term7, term6, out=np.zeros_like(term7), where=(term6 != 0)) # M times d and M times 1 --> M x d term8 = np.dot(Ps, X**2) myTheta.Sigma = np.divide( term8, term6, out=np.zeros_like(term8), where=(term6 != 0)) - ( myTheta.mu**2) # M x d # print(myTheta.Sigma) return myTheta
def numeric(self, values): """Evaluates e^x elementwise, sums, and takes the log. """ return logsumexp(values[0], axis=self.axis, keepdims=self.keepdims)
def importance_sampler(raw_data, analysis_settings): """ Recovers a curve that best explains the relationship between the predictor and dependent variables **Arguments**: - raw_data: The data matrix (total number of trials x 6 columns). Refer to RUN_IMPORTANCE_SAMPLER() - analysis_settings: A struct that holds algorithm relevant settings. Refer to RUN_IMPORTANCE_SAMPLER() Saves a .mat file in `current_path/analysis_id/analysis_id_importance_sampler.mat` """ time = datetime.datetime.now() print('Start time {}/{} {}:{}'.format(time.month, time.day, time.hour, time.minute)) # Resetting the random number seed random.seed() seed = random.getstate() # Preprocessing the data matrix and updating the analysis_settings struct with additional/missing information preprocessed_data, ana_opt = preprocessing_setup(raw_data, analysis_settings) del raw_data del analysis_settings # Housekeeping importance_sampler = {} # Creating the output struct hold_betas_per_iter = np.full( (ana_opt['em_iterations'] + 1, 2), np.nan) # Matrix to hold betas over em iterations exp_max_f_values = np.full( (ana_opt['em_iterations'], 1), np.nan) # Matrix to hold the f_values over em iterations normalized_w = np.full( (ana_opt['em_iterations'] + 1, ana_opt['particles']), np.nan) # to hold the normalized weights global tau global bounds global w global net_effects global dependent_var # fetch parameters tau = ana_opt['tau'] # Store the tau for convenience bounds = family_of_curves( ana_opt['curve_type'], 'get_bounds') # Get the curve parameter absolute bounds nParam = family_of_curves( ana_opt['curve_type'], 'get_nParams') # Get the number of curve parameters hold_betas = [ana_opt['beta_0'], ana_opt['beta_1']] # Store the betas into a vector for em in range(ana_opt['em_iterations']): # for every em iteration hold_betas_per_iter[ em, :] = hold_betas # Store the logreg betas over em iterations print('Betas: {}, {}'.format(hold_betas[0], hold_betas[1])) print('EM Iteration: {}'.format(em)) # Initialize the previous iteration curve parameters, weight vector, net_effects and dependent_var matrices # Matrix to hold the previous iteration curve parameters prev_iter_curve_param = np.full( (ana_opt['particles'], family_of_curves(ana_opt['curve_type'], 'get_nParams')), np.nan) w = np.full((ana_opt['particles']), np.nan) # Vector to hold normalized weights # Matrix to hold the predictor variables (taking net effects if relevant) over all particles net_effects = np.full( (len(ana_opt['net_effect_clusters']), ana_opt['particles']), np.nan) dependent_var = np.array( [] ) # can't be initialized in advance as we don't know its length (dropping outliers) # Sampling curve parameters if em == 0: # only for the first em iteration param = common_to_all_curves( ana_opt['curve_type'], 'initial_sampling', ana_opt['particles'], ana_opt['resolution']) # Good old uniform sampling else: # for em iterations 2, 3, etc # Sample curve parameters from previous iteration's curve parameters based on normalized weights prev_iter_curve_param = param # we need previous iteration's curve parameters to compute likelihood # Here we sample curves (with repetitions) based on the weights param = prev_iter_curve_param[ random.choices(np.arange(ana_opt['particles']), k=ana_opt['particles'], weights=normalized_w[em - 1, :]), :] # Add Gaussian noise since some curves are going to be identical due to the repetitions # NOISE: Sample from truncated normal distribution using individual curve parameter bounds, # mean = sampled curve parameters and sigma = tau for npm in range(nParam): param[:, npm] = truncated_normal(bounds[npm, 0], bounds[npm, 1], param[:, npm], tau, ana_opt['particles']) # Check whether curve parameters lie within the upper and lower bounds param = common_to_all_curves(ana_opt['curve_type'], 'check_if_exceed_bounds', param) if ana_opt['curve_type'] == 'horz_indpnt': # Check if the horizontal curve parameters are following the right trend i.e. x1 < x2 param = common_to_all_curves(ana_opt['curve_type'], 'sort_horizontal_params', param) # Compute the likelihood over all subjects (i.e. log probability mass function if logistic regression) # This is where we use the chunking trick II for ptl_idx in range(np.shape(ana_opt['ptl_chunk_idx'])[0]): output_struct = family_of_curves( ana_opt['curve_type'], 'compute_likelihood', ana_opt['net_effect_clusters'], ana_opt['ptl_chunk_idx'][ptl_idx, 2], param[int(ana_opt['ptl_chunk_idx'][ ptl_idx, 0]):int(ana_opt['ptl_chunk_idx'][ptl_idx, 1]), :], hold_betas, preprocessed_data, ana_opt['distribution'], ana_opt['dist_specific_params'], ana_opt['data_matrix_columns']) # Gather weights w[int(ana_opt['ptl_chunk_idx'][ptl_idx, 0]):int(ana_opt['ptl_chunk_idx'][ ptl_idx, 1])] = output_struct['w'] # Gather predictor variable net_effects[:, int(ana_opt['ptl_chunk_idx'][ptl_idx, 0]):int(ana_opt['ptl_chunk_idx'][ptl_idx, 1])] = \ output_struct['net_effects'] if ptl_idx == 0: # Gather dependent variable only once, since it is the same across all ptl_idx dependent_var = output_struct['dependent_var'] del output_struct if np.any(np.isnan(w)): raise ValueError('NaNs in normalized weight vector w!') # Compute the p(theta) and q(theta) weights if em > 0: p_theta_minus_q_theta = compute_weights( ana_opt['curve_type'], ana_opt['particles'], normalized_w[em - 1, :], prev_iter_curve_param, param, ana_opt['wgt_chunks'], ana_opt['resolution']) w += p_theta_minus_q_theta w = np.exp( w - special.logsumexp(w) ) # Normalize the weights using logsumexp to avoid numerical underflow normalized_w[em, :] = w # Store the normalized weights # Optimize betas using fminunc optimizing_function = family_of_distributions( ana_opt['distribution'], 'fminunc_both_betas', w, net_effects, dependent_var, ana_opt['dist_specific_params']) result = optimize.minimize(optimizing_function, np.array(hold_betas), jac=True, options={ 'disp': True, 'return_all': True }) hold_betas = result.x f_value = result.fun exp_max_f_values[ em] = f_value # gather the f_values over em iterations hold_betas_per_iter[ em + 1, :] = hold_betas # Store away the last em iteration betas print('>>>>>>>>> Final Betas: {}, {} <<<<<<<<<'.format( hold_betas[0], hold_betas[1])) # Flipping the vertical curve parameters if beta_1 is negative importance_sampler['flip'] = False neg_beta_idx = hold_betas[1] < 0 if neg_beta_idx: print('!!!!!!!!!!!!!!!!!!!! Beta 1 is flipped !!!!!!!!!!!!!!!!!!!!') hold_betas[1] = hold_betas[1] * -1 param = common_to_all_curves(ana_opt['curve_type'], 'flip_vertical_params', param) importance_sampler['flip'] = True w = np.full((ana_opt['particles']), np.nan) # Clearing the weight vector # Used for a likelihoods ratio test to see if our beta1 value is degenerate w_null_hypothesis = np.full((ana_opt['particles']), np.nan) # The null hypothesis for the likelihoods ratio test states that our model y_hat = beta_0 + beta_1 * predictor # variable is no different than the simpler model y_hat = beta_0 + beta_1 * predictor variable WHERE BETA_1 = # ZERO i.e. our model is y_hat = beta_0 null_hypothesis_beta = [hold_betas[0], 0] for ptl_idx in range(np.shape(ana_opt.ptl_chunk_idx)[0]): output_struct = family_of_curves( ana_opt['curve_type'], 'compute_likelihood', ana_opt['net_effect_clusters'], ana_opt['ptl_chunk_idx'][ptl_idx, 3], param[ana_opt['ptl_chunk_idx'][ptl_idx, 1]:ana_opt['ptl_chunk_idx'][ptl_idx, 2], :], hold_betas, preprocessed_data, ana_opt['distribution'], ana_opt['dist_specific_params'], ana_opt['data_matrix_columns']) w[ana_opt['ptl_chunk_idx'][ptl_idx, 1]:ana_opt['ptl_chunk_idx'][ ptl_idx, 2]] = output_struct['w'] # this code computes the log likelihood of the data under the null hypothesis i.e. using null_hypothesis_beta # instead of hold_betas -- it's "lazy" because, unlike the alternative hypothesis, we don't have to compute the # data likelihood for each particle because it's exactly the same for each particle (b/c compute_likelihood uses # z = beta_1 * x + beta_0, but (recall that our particles control the value of x in this equation) beta_1 is zero # for the null hypothesis) that's why we pass in the zero vector representing a single particle with irrelevant # weights so we don't have to do it for each particle unnecessarily output_struct_null_hypothesis_lazy = family_of_curves( ana_opt['curve_type'], 'compute_likelihood', ana_opt['net_effect_clusters'], 1, [0, 0, 0, 0, 0, 0], null_hypothesis_beta, preprocessed_data, ana_opt['distribution'], ana_opt['dist_specific_params'], ana_opt['data_matrix_columns']) data_likelihood_null_hypothesis = output_struct_null_hypothesis_lazy['w'] data_likelihood_alternative_hypothesis = w w = w + p_theta_minus_q_theta if np.any(np.isnan(w)): raise ValueError('NaNs in normalized weight vector w!') w = np.exp( w - special.logsumexp(w) ) # Normalize the weights using logsumexp to avoid numerical underflow normalized_w[em + 1, :] = w # Store the normalized weights # Added for debugging chi-sq, might remove eventually importance_sampler[ 'data_likelihood_alternative_hypothesis'] = data_likelihood_alternative_hypothesis importance_sampler[ 'data_likelihood_null_hypothesis'] = data_likelihood_null_hypothesis # we calculate the data_likelihood over ALL particles by multiplying the data_likelihood for each particle by # that particle's importance weight dummy_var, importance_sampler['likratiotest'] = likratiotest( w * np.transpose(data_likelihood_alternative_hypothesis), data_likelihood_null_hypothesis, 2, 1) if np.any(np.isnan(normalized_w)): raise ValueError('NaNs in normalized weights vector!') if np.any(np.isnan(exp_max_f_values)): raise ValueError('NaNs in Expectation maximilzation fval matrix!') if np.any(np.isnan(hold_betas_per_iter)): raise ValueError('NaNs in hold betas matrix!') importance_sampler['normalized_weights'] = normalized_w importance_sampler['exp_max_fval'] = exp_max_f_values importance_sampler['hold_betas_per_iter'] = hold_betas_per_iter importance_sampler['curve_params'] = param importance_sampler['analysis_settings'] = ana_opt if ana_opt['bootstrap']: sio.savemat( '{}/{}_b{}_importance_sampler.mat'.format( ana_opt['target_dir'], ana_opt['analysis_id'], ana_opt['bootstrap_run']), {'importance_sampler': importance_sampler}) elif ana_opt['scramble']: sio.savemat( '{}/{}_s{}_importance_sampler.mat'.format(ana_opt['target_dir'], ana_opt['analysis_id'], ana_opt['scramble_run']), {'importance_sampler': importance_sampler}) else: sio.savemat( '{}/{}_importance_sampler.mat'.format(ana_opt['target_dir'], ana_opt['analysis_id']), {'importance_sampler': importance_sampler}) print('Results are stored in be stored in {}'.format( ana_opt['target_dir'])) time = datetime.datetime.now() print('Finish time {}/{} {}:{}'.format(time.month, time.day, time.hour, time.minute))
def estimate_mixture_params(EmissionParameters, curr_counts_orig, curr_nr_of_counts_orig, curr_state, rand_sample_size, max_nr_iter, nr_of_iter=20, stop_crit=1.0, nr_of_init=10, verbosity=1): ''' This function estimates thedirichlet multinomial mixture parameters ''' #1) Copy old parameters and use it as initialisation for the first iteration alphas_list = [] mixtures_list = [] lls_list = [] curr_counts = deepcopy(curr_counts_orig) curr_nr_of_counts = deepcopy(curr_nr_of_counts_orig) if len(curr_counts.shape) == 1: curr_counts = np.expand_dims(curr_counts, axis=1) if np.sum(np.sum(curr_counts, axis=0) > 0) > 0: curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) >0] curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) >0] #Test for fitting distributions only on diag events curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) > 10] curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) > 10] tracks_per_rep = EmissionParameters['Diag_event_params']['alpha'][curr_state].shape[0] NrOfReplicates = curr_counts.shape[0] / tracks_per_rep noncon = np.sum(curr_counts[[tracks_per_rep - 1 + (tracks_per_rep * i) for i in range(NrOfReplicates)],:], axis=0) conv = np.sum(curr_counts, axis=0) - noncon ratio = conv / np.float64(conv + noncon) rat_ix = ((ratio > 0.05) * (ratio < 0.95)) > 0 rat_ix = ((ratio < 0.95)) > 0 curr_nr_of_counts = curr_nr_of_counts[:, rat_ix] curr_counts = curr_counts[:, rat_ix] #end test if len(curr_counts.shape) == 1: curr_counts = np.expand_dims(curr_counts, axis=1) #Save old lls mixtures and alphas mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state]) OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state]) #pdb.set_trace() scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1])) ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts))) alphas_list.append(deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state])) mixtures_list.append(deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state])) lls_list.append(ll) for curr_init in range(nr_of_init): #compute the curr mixture, ll and alpha #initialiste the parameters old_ll = 0 if curr_init == 0: OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state]) mixtures = deepcopy(mixtures) else: OldAlpha = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha.shape) for i in range(OldAlpha.shape[1]): OldAlpha[np.random.randint(OldAlpha.shape[0]-1), i] = np.random.random() * 10.0 OldAlpha[-2, i] = np.random.random() * 1.0 OldAlpha[-1, i] = np.random.random() * 10.0 mixtures = np.random.uniform(low=0.0001, high=1.0, size=mixtures.shape) mixtures /= np.sum(mixtures) if EmissionParameters['Diag_event_params']['nr_mix_comp'] == 1: #Case that only one mixture component is given EmissionParameters['Diag_event_params']['alpha'][curr_state][:, 0] = diag_event_model.estimate_multinomial_parameters(curr_counts, curr_nr_of_counts, EmissionParameters, OldAlpha[:]) #compute ll scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1])) ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts))) alphas_list.append(deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state])) mixtures_list.append(deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state])) lls_list.append(ll) else: zero_ix = [] for iter_nr in range(max_nr_iter): print('em-iteration ' + str(iter_nr)) scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1])) # 2) Compute the mixture components #compute the normalisation factor normalised_likelihood = logsumexp(scored_counts, axis=0) old_ll = ll ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts))) if np.abs(old_ll - ll) < stop_crit: #Check if convergence has been reached if len(zero_ix) == 0: break normalised_scores = scored_counts - np.tile(normalised_likelihood, (scored_counts.shape[0], 1)) un_norm_mixtures = logsumexp(normalised_scores, b=np.tile(curr_nr_of_counts, (scored_counts.shape[0], 1)), axis = 1) mixtures = np.exp(un_norm_mixtures - logsumexp(un_norm_mixtures)) # 3) Compute for eachcount the most likely mixture component curr_weights = np.exp(normalised_scores) curr_weights = (curr_weights == np.tile(np.max(curr_weights, axis=0), (curr_weights.shape[0], 1))) *1.0 zero_mix = np.sum(curr_weights,axis=1) == 0 zero_ix = np.where(zero_mix)[0].tolist() EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = mixtures #Get number of positions that are used. (In case there are fewer entries that rand_sample_size in counts) rand_size = min(rand_sample_size, curr_counts.shape[1]) for i in zero_ix: random_ix = np.random.choice(curr_counts.shape[1], rand_size, p=(curr_nr_of_counts[0, :] / np.float(np.sum(curr_nr_of_counts[0, :])))) curr_counts = np.hstack([curr_counts, curr_counts[:, random_ix]]) curr_nr_of_counts = np.hstack([curr_nr_of_counts, np.ones((1, rand_size))]) temp_array = np.zeros((normalised_scores.shape[0], rand_size)) temp_array[i, :] = i normalised_scores = np.hstack([normalised_scores, temp_array]) temp_array = np.zeros((curr_weights.shape[0], rand_size)) temp_array[i, :] = 1 curr_weights = np.hstack([curr_weights, temp_array]) # 4) Compute the dirichlet-multinomial parameters for curr_mix_comp in range(EmissionParameters['Diag_event_params']['nr_mix_comp']): local_counts = curr_counts local_nr_counts = curr_nr_of_counts * curr_weights[curr_mix_comp, :] local_counts = local_counts[:, local_nr_counts[0, :] > 0] local_nr_counts = local_nr_counts[0, local_nr_counts[0, :] > 0] if len(local_counts.shape) == 1: local_counts = np.expand_dims(local_counts, axis=1) curr_alpha = diag_event_model.estimate_multinomial_parameters(local_counts, local_nr_counts, EmissionParameters, OldAlpha[:, curr_mix_comp]) if curr_mix_comp in zero_ix: OldAlpha[:, curr_mix_comp] = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha[:, curr_mix_comp].shape) OldAlpha[np.random.randint(OldAlpha.shape[0]), curr_mix_comp] = np.random.random() * 10.0 OldAlpha[-2, curr_mix_comp] = np.random.random() * 1.0 OldAlpha[-1, curr_mix_comp] = np.random.random() * 10.0 else: OldAlpha[:, curr_mix_comp] = curr_alpha if (len(zero_ix) > 0) and (iter_nr + 2 < max_nr_iter): #Treat the case where some mixtures have prob zero mixtures[zero_ix] = np.mean(mixtures) mixtures /= np.sum(mixtures) EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = deepcopy(mixtures) EmissionParameters['Diag_event_params']['alpha'][curr_state] = deepcopy(OldAlpha) # Check if convergence has been achieved. alphas_list.append(deepcopy(OldAlpha)) mixtures[zero_ix] = np.min(mixtures[mixtures > 0]) mixtures /= np.sum(mixtures) mixtures_list.append(deepcopy(mixtures)) lls_list.append(ll) #select which alpha had the highest ll max_ll_pos = np.argmax(np.array(lls_list)) alpha = alphas_list[max_ll_pos] mixtures = mixtures_list[max_ll_pos] return alpha, mixtures
def normalizelogspace(x): L = logsumexp(x, axis=1).reshape(-1, 1) Lnew = np.repeat(L, 3, axis=1) y = x - Lnew return y, Lnew
def Parallel_estimate_single_mixture_params(args): ''' This function estimates thedirichlet multinomial mixture parameters ''' stop_crit, rand_sample_size, max_nr_iter, curr_init, EmissionParameters, curr_state, curr_counts, curr_nr_of_counts = args #compute the curr mixture, ll and alpha #initialiste the parameters old_ll = 0 ll = -10 OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state]) mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state]) if curr_init > 0: OldAlpha = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha.shape) for i in range(OldAlpha.shape[1]): OldAlpha[np.random.randint(OldAlpha.shape[0]-1), i] = np.random.random() * 10.0 OldAlpha[-2, i] = np.random.random() * 1.0 OldAlpha[-1, i] = np.random.random() * 10.0 mixtures = np.random.uniform(low=0.0001, high=1.0, size=mixtures.shape) mixtures /= np.sum(mixtures) if EmissionParameters['Diag_event_params']['nr_mix_comp'] == 1: #Case that only one mixture component is given EmissionParameters['Diag_event_params']['alpha'][curr_state][:, 0] = diag_event_model.estimate_multinomial_parameters(curr_counts, curr_nr_of_counts, EmissionParameters, OldAlpha[:]) #compute ll scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1])) ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts))) OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state]) mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state]) else: zero_ix = [] for iter_nr in range(max_nr_iter): print('em-iteration ' + str(iter_nr)) scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1])) # 2) Compute the mixture components #compute the normalisation factor normalised_likelihood = logsumexp(scored_counts, axis=0) old_ll = ll ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts))) if np.abs(old_ll - ll) < stop_crit: if len(zero_ix) == 0: break normalised_scores = scored_counts - np.tile(normalised_likelihood, (scored_counts.shape[0], 1)) un_norm_mixtures = logsumexp(normalised_scores, b=np.tile(curr_nr_of_counts, (scored_counts.shape[0], 1)), axis = 1) mixtures = np.exp(un_norm_mixtures - logsumexp(un_norm_mixtures)) # 3) Compute for eachcount the most likely mixture component curr_weights = np.exp(normalised_scores) curr_weights = (curr_weights == np.tile(np.max(curr_weights, axis=0), (curr_weights.shape[0], 1))) *1.0 zero_mix = np.sum(curr_weights,axis=1) == 0 zero_ix = np.where(zero_mix)[0].tolist() EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = mixtures #Get number of positions that are used. (In case there are fewer entries that rand_sample_size in counts) rand_size = min(rand_sample_size, curr_counts.shape[1]) for i in zero_ix: random_ix = np.random.choice(curr_counts.shape[1], rand_size, p=(curr_nr_of_counts[0, :] / np.float(np.sum(curr_nr_of_counts[0, :])))) curr_counts = np.hstack([curr_counts, curr_counts[:, random_ix]]) curr_nr_of_counts = np.hstack([curr_nr_of_counts, np.ones((1, rand_size))]) temp_array = np.zeros((normalised_scores.shape[0], rand_size)) temp_array[i, :] = i normalised_scores = np.hstack([normalised_scores, temp_array]) temp_array = np.zeros((curr_weights.shape[0], rand_size)) temp_array[i, :] = 1 curr_weights = np.hstack([curr_weights, temp_array]) # 4) Compute the dirichlet-multinomial parameters for curr_mix_comp in range(EmissionParameters['Diag_event_params']['nr_mix_comp']): local_counts = curr_counts local_nr_counts = curr_nr_of_counts * curr_weights[curr_mix_comp, :] local_counts = local_counts[:, local_nr_counts[0, :] > 0] local_nr_counts = local_nr_counts[0, local_nr_counts[0, :] > 0] if len(local_counts.shape) == 1: local_counts = np.expand_dims(local_counts, axis=1) curr_alpha = diag_event_model.estimate_multinomial_parameters(local_counts, local_nr_counts, EmissionParameters, OldAlpha[:, curr_mix_comp]) if curr_mix_comp in zero_ix: OldAlpha[:, curr_mix_comp] = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha[:, curr_mix_comp].shape) OldAlpha[np.random.randint(OldAlpha.shape[0]), curr_mix_comp] = np.random.random() * 10.0 OldAlpha[-2, curr_mix_comp] = np.random.random() * 1.0 OldAlpha[-1, curr_mix_comp] = np.random.random() * 10.0 else: OldAlpha[:, curr_mix_comp] = curr_alpha if (len(zero_ix) > 0) and (iter_nr + 2 < max_nr_iter): #Treat the case where some mixtures have prob zero mixtures[zero_ix] = np.mean(mixtures) mixtures /= np.sum(mixtures) EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = deepcopy(mixtures) EmissionParameters['Diag_event_params']['alpha'][curr_state] = deepcopy(OldAlpha) # Check if convergence has been achieved. mixtures[zero_ix] = np.min(mixtures[mixtures > 0]) mixtures /= np.sum(mixtures) return [deepcopy(OldAlpha), mixtures, ll]
def plot2d_network(network, counts='weighted', label_name=None, labels=None, labels_err=None, vals=None, dims=(0, 1), cmap='viridis', Nmc=5, point_est='median', plot_kwargs=None, rstate=None, discrete=False, verbose=True, *args, **kwargs): """ Plot a 2-D projection of the network colored by the chosen variable. Parameters ---------- network : `~frankenz.networks._Network`-derived object The trained and populated network object. counts : {'absolute', 'weighted'}, optional The number density of objects mapped onto the network. If `'absolute'`, the raw number of objects associated with each node will be plotted. If `'weighted'`, the weighted number of objects will be shown. Default is `'weighted'`. labels : `~numpy.ndarray` with shape (Nobj), optional The labels we want to project over the network. Will override `counts` if provided. label_name : str, optional The name of the label. labels_err : `~numpy.ndarray` with shape (Nobj), optional Errors on the labels. vals : `~numpy.ndarray` with shape (Nnodes), optional The values to be plotted directly on the network. Overrides `labels`. dims : 2-tuple, optional The `(x, y)` dimensions the network should be plotted over. Default is `(0, 1)`. cmap : colormap, optional The colormap used when plotting results. Default is `'viridis'`. Nmc : int, optional The number of Monte Carlo realizations of the label value(s) if the error(s) are provided. Default is `5`. point_est : str or func, optional The point estimator to be plotted. Pre-defined options include `'mean'`, `'median'`, `'std'`, and `'mad'`. If a function is passed, it will be used to compute the weighted point estimate using input of the form `(labels, wts)`. Default is `'median'`. plot_kwargs : kwargs, optional Keyword arguments to be passed to `~matplotlib.pyplot.scatter`. rstate : `~numpy.random.RandomState` instance, optional Random state instance. If not passed, the default `~numpy.random` instance will be used. discrete : bool, optional Whether to assign weights based **only** on the best-fitting node rather than all nodes an object might be associated with. Default is `False`. verbose : bool, optional Whether to print progress. Default is `True`. Returns ------- vals : `~numpy.ndarray` with shape (Nnodes) Corresponding point estimates for the input labels. """ # Initialize values. if plot_kwargs is None: plot_kwargs = dict() if rstate is None: rstate = np.random if label_name is None and (labels is not None or vals is not None): label_name = 'Node Value' Nnodes = network.NNODE xpos = network.nodes_pos[:, dims[0]] ypos = network.nodes_pos[:, dims[1]] # Compute counts. if counts == 'absolute' and labels is None and vals is None: vals = network.nodes_Nmatch if label_name is None: label_name = 'Counts' elif counts == 'weighted' and labels is None and vals is None: vals = np.array( [np.exp(logsumexp(logwts)) for logwts in network.nodes_logwts]) if label_name is None: label_name = 'Weighted Counts' # Compute point estimates. if vals is None and labels is not None: vals = np.zeros(Nnodes) for i in range(Nnodes): # Print progress. if verbose: sys.stderr.write('\rComputing {0} estimate {1}/{2}'.format( label_name, i + 1, Nnodes)) sys.stderr.flush() # Grab relevant objects. idxs = network.nodes_idxs[i] if discrete: logwts = np.log(network.nodes_bmus[i] + 1e-100) else: logwts = network.nodes_logwts[i] wts = np.exp(logwts - logsumexp(logwts)) # normalized weights ys = labels[idxs] # labels Ny = len(ys) # Account for label errors (if provided) using Monte Carlo methods. if labels_err is not None: yes = labels_err[idxs] # errors ys = rstate.normal(ys, yes, size=(Nmc, Ny)).flatten() wts = np.tile(wts, Nmc) / Nmc if point_est == 'mean': # Compute weighted mean. val = np.dot(wts, ys) elif point_est == 'median': # Compute weighted median. sort_idx = np.argsort(ys) sort_cdf = wts[sort_idx].cumsum() val = np.interp(0.5, sort_cdf, ys[sort_idx]) elif point_est == 'std': # Compute weighted std. ymean = np.dot(wts, ys) # mean val = np.dot(wts, np.square(ys - ymean)) elif point_est == 'mad': # Compute weighted MAD. sort_idx = np.argsort(ys) sort_cdf = wts[sort_idx].cumsum() ymed = np.interp(0.5, sort_cdf, ys[sort_idx]) # median dev = np.abs(ys - ymed) # absolute deviation sort_idx = np.argsort(dev) sort_cdf = wts[sort_idx].cumsum() val = np.interp(0.5, sort_cdf, dev[sort_idx]) else: try: val = point_est(ys, wts) except: raise RuntimeError("`point_est` function failed!") vals[i] = val if verbose: sys.stderr.write('\n') sys.stderr.flush() # Plot results. plt.scatter(xpos, ypos, c=vals, cmap=cmap, **plot_kwargs) plt.xlabel(r'$x_{0}$'.format(str(dims[0]))) plt.ylabel(r'$x_{0}$'.format(str(dims[1]))) plt.colorbar(label=label_name) return vals
def test_get_ll_contrib(self): # batch_size = 3, trimmed_input_len = 3 # # In the first instance, the contribution to the likelihood should # come from both the generation scores and the copy scores, since the # token is in the source sentence and the target vocabulary. # In the second instance, the contribution should come only from the # generation scores, since the token is not in the source sentence. # In the third instance, the contribution should come only from the copy scores, # since the token is in the source sequence but is not in the target vocabulary. vocab = self.model.vocab generation_scores = torch.tensor([ [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], # these numbers are arbitrary. [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], [0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], ]) # shape: (batch_size, target_vocab_size) copy_scores = torch.tensor([[1.0, 2.0, 1.0], [1.0, 2.0, 3.0], [2.0, 2.0, 3.0]] # these numbers are arbitrary. ) # shape: (batch_size, trimmed_input_len) target_tokens = torch.tensor([ vocab.get_token_index("tokens", self.model._target_namespace), vocab.get_token_index("the", self.model._target_namespace), self.model._oov_index, ]) # shape: (batch_size,) target_to_source = torch.tensor([[0, 1, 0], [0, 0, 0], [1, 0, 1]]) # shape: (batch_size, trimmed_input_len) copy_mask = torch.tensor([[True, True, False], [True, False, False], [True, True, True]]) # shape: (batch_size, trimmed_input_len) # This is what the log likelihood result should look like. ll_check = np.array([ # First instance. logsumexp( np.array([ generation_scores[0, target_tokens[0].item()].item(), 2.0 ])) - logsumexp( np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0, 2.0])), # Second instance. generation_scores[1, target_tokens[1].item()].item() - logsumexp(np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])), # Third instance. logsumexp(np.array([2.0, 3.0])) - logsumexp( np.array( [0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 2.0, 2.0, 3.0])), ]) # This is what the selective_weights result should look like. selective_weights_check = np.stack([ np.array([0.0, 1.0, 0.0]), np.array([0.0, 0.0, 0.0]), np.exp([2.0, float("-inf"), 3.0]) / (np.exp(2.0) + np.exp(3.0)), ]) generation_scores_mask = generation_scores.new_full( generation_scores.size(), True, dtype=torch.bool) ll_actual, selective_weights_actual = self.model._get_ll_contrib( generation_scores, generation_scores_mask, copy_scores, target_tokens, target_to_source, copy_mask, ) np.testing.assert_almost_equal(ll_actual.data.numpy(), ll_check, decimal=6) np.testing.assert_almost_equal(selective_weights_actual.data.numpy(), selective_weights_check, decimal=6)