def prior_calculations(lbda,maxlen,eta,maxlhs): #First normalization constants for beta beta_Z = poisson.cdf(maxlhs,eta) - poisson.pmf(0,eta) #Then the actual un-normalized pmfs logalpha_pmf = {} for i in range(maxlen+1): try: logalpha_pmf[i] = poisson.logpmf(i,lbda) except RuntimeWarning: logalpha_pmf[i] = -inf logbeta_pmf = {} for i in range(1,maxlhs+1): logbeta_pmf[i] = poisson.logpmf(i,eta) return beta_Z,logalpha_pmf,logbeta_pmf
def log_poisson_pmf(X, rates): n_samples = len(X) nmix = len(rates) log_prob = np.empty((n_samples, nmix)) for c, rate in enumerate(rates): log_prob[:, c] = poisson.logpmf(X, rate) return log_prob
def objective_poisson_evaulate(p, X, y, w, max_link=512): """ Mean log probability for poisson outcome """ if w is not None: raise ValueError('Sample weights not supported for this model') from scipy.stats import poisson yp = np.exp(np.clip(np.dot(X, p), -max_link, max_link)) return -np.mean(poisson.logpmf(y, yp))
def poisson_pdf(x, u, log=False): """ The probability of observing x events given the expected number of events is u """ #return np.exp(-u)*(u**x)/factorial(x) #return np.exp(-u)*(u**x)/gamma(x+1) if log: return poisson.logpmf(x, u) return poisson.pmf(x, u)
def logl(rate, cts, exp): """ log-likelihood of a shadowgram relative to a background shadowgram rate: expected rate shadowgram cts: cube counts shadowgram exp: cube exposure shadowgram """ idx = np.logical_not(np.logical_or(np.logical_or(cts.mask, exp.mask), rate.mask)) return (poisson.logpmf(cts[idx], rate[idx] * exp[idx]).sum(), np.count_nonzero(idx))
def bPFearth(self,data,sys,par): a = np.zeros((self.nPart,sys.T)); s = np.zeros((self.nPart,sys.T)); p = np.zeros((self.nPart,sys.T)); w = np.zeros((self.nPart,sys.T)); xh = np.zeros((sys.T,1)); sh = np.zeros((sys.T,1)); llp = 0.0; p[:,0] = self.xo; s[:,0] = self.so; for tt in range(0, sys.T): if tt != 0: # Resample (if needed by ESS criteria) if ((np.sum(w[:,tt-1]**2))**(-1) < (self.nPart * self.resampFactor)): if self.resamplingType == "systematic": nIdx = self.resampleSystematic(w[:,tt-1],par); elif self.resamplingType == "multinomial": nIdx = self.resampleMultinomial(w[:,tt-1],par); else: nIdx = self.resample(w[:,tt-1],par); nIdx = np.transpose(nIdx.astype(int)); else: nIdx = np.arange(0,self.nPart); # Propagate s[:,tt] = sys.h(p[nIdx,tt-1], data.u[tt-1], s[nIdx,tt-1], tt-1) p[:,tt] = sys.f(p[nIdx,tt-1], data.u[tt-1], s[:,tt], data.y[tt-1], tt-1) + sys.fn(p[nIdx,tt-1], s[:,tt], data.y[tt-1], tt-1) * np.random.randn(1,self.nPart); a[:,tt] = nIdx; # Calculate weights w[:,tt] = poisson.logpmf(data.y[tt],sys.g(p[:,tt], data.u[tt], s[:,tt], tt)); wmax = np.max(w[:,tt]); w[:,tt] = np.exp(w[:,tt] - wmax); # Estimate log-likelihood llp += wmax + np.log(np.sum(w[:,tt])) - np.log(self.nPart); # Estimate state w[:,tt] /= np.sum(w[:,tt]); xh[tt] = np.sum( w[:,tt] * p[:,tt] ); sh[tt] = np.sum( w[:,tt] * s[:,tt] ); self.xhatf = xh; self.shatf = sh; self.ll = llp; self.w = w; self.a = a; self.p = p; self.s = s;
def _compute_log_likelihood(self, obs): log_prob = np.zeros((obs.shape[1], self.n_components)) for i in range(self.n_components): for d in range(self.n_samples): for c in range(self.n_rates): if self.D_[d, i] == c: work_buffer = poisson.logpmf(obs[d], self.rates_[c]) log_prob[:, i] += np.where(np.isnan(work_buffer), np.log(obs[d] == 0), work_buffer) return log_prob
def _compute_log_likelihood(self, obs): all_obs = np.concatenate(obs) logp = np.empty([1, len(all_obs)]) for lam in self._means_: p = poisson.logpmf(all_obs, lam) logp = np.vstack((logp,p)) logp = logp[1:,:] return logp.T
def _infolik(spikes, predictions, fps=25): """ Computes log likelihood of the data and the information gain adapted from lucas theis, c2s package https://github.com/lucastheis/c2s """ factor = 1 ### DITO # find optimal point-wise monotonic function f = optimize_predictions(predictions, spikes, num_support=10, regularize=5e-8, verbosity=2) # for conversion into bit/s factor = 1. / factor / log(2.) # average firing rate (Hz) over all cells firing_rate = mean(spikes) * fps; # estimate log-likelihood and marginal entropies loglik = mean(poisson.logpmf(spikes, f(predictions))) * fps * factor entropy = -mean(poisson.logpmf(spikes, firing_rate / fps)) * fps * factor return loglik, loglik + entropy
def _error( value ) : '''Construct log likelihood errors using Poisson distribution''' # likelihood = P(value|lambda) using underlying Poisson assumption n_samples = 1000 lambdas, loglikelihoods = zip( *( (x,-2*poisson.logpmf(value,x)) for x in np.linspace(0,2*value+1,n_samples) ) ) interpolated_ll = interpolate.interp1d(lambdas, loglikelihoods) # up error: lambda for which interpolated_ll(lambda) = interpolated_ll(value) + 1 # down error: lambda for which interpolated_ll(lambda) = interpolated_ll(value) + 1 ll_at_value, lambda_up, lambda_down, step_size = interpolated_ll(value), 1.1*value, 0.9*value, float(value)/10 for i in range(5) : lambda_up -= step_size; lambda_down += step_size; step_size /= 10 while interpolated_ll(lambda_down) - ll_at_value < 1 : lambda_down -= step_size while interpolated_ll(lambda_up) - ll_at_value < 1 : lambda_up += step_size return (value-lambda_down,lambda_up-value)
def log_length_prior(self, runs=None, length_list=None, c_l=None): """Calculate the prior probability of a run, based on its length and its category """ if c_l is None: c_l = self.l if runs is None and length_list is None: raise NameError('No length or run specified') if runs is not None: length_list = [len(_) for _ in runs] # length-based prior if self.prior_type == 'Poisson': return poisson.logpmf(length_list, c_l) elif self.prior_type == 'Geometric': return np.array([(run_length - 1) * np.log(1 - c_l) + np.log(c_l) for run_length in length_list])
def _logprob(self, sample): """Calculate the joint log probability of data and model given a sample. """ cur_y, cur_z = sample log_prior = 0 log_lik = 0 if cur_z.shape[1] == 0: return -99999999.9 if self.cl_mode: a_time = time() d_cur_z = cl.Buffer(self.ctx, self.mf.READ_ONLY | self.mf.COPY_HOST_PTR, hostbuf = cur_z.astype(np.int32)) d_cur_y = cl.Buffer(self.ctx, self.mf.READ_ONLY | self.mf.COPY_HOST_PTR, hostbuf = cur_y.astype(np.int32)) d_logprob = cl.array.empty(self.queue, (cur_z.shape[0],), np.float32, allocator=self.mem_pool) if cur_z.shape[0] % self.p_mul_logprob_z_data == 0: wg = (self.p_mul_logprob_z_data,) else: wg = None self.prg.logprob_z_data(self.queue, (cur_z.shape[0],), wg, d_cur_z, d_cur_y, self.d_obs, d_logprob.data, #d_novel_f.data, np.int32(self.N), np.int32(cur_y.shape[1]), np.int32(cur_z.shape[1]), np.float32(self.alpha), np.float32(self.lam), np.float32(self.epislon)) log_lik = d_logprob.get().sum() self.gpu_time += time() - a_time # calculate the prior probability of Y num_on = (cur_y == 1).sum() num_off = (cur_y == 0).sum() log_prior = num_on * np.log(self.theta) + num_off * np.log(1 - self.theta) else: # calculate the prior probability of Z for n in xrange(cur_z.shape[0]): num_novel = 0 for k in xrange(cur_z.shape[1]): m = cur_z[:n,k].sum() if m > 0: if cur_z[n,k] == 1: log_prior += np.log(m / (n+1.0)) else: log_prior += np.log(1 - m / (n + 1.0)) else: if cur_z[n,k] == 1: num_novel += 1 if num_novel > 0: log_prior += poisson.logpmf(num_novel, self.alpha / (n+1.0)) # calculate the prior probability of Y num_on = (cur_y == 1).sum() num_off = (cur_y == 0).sum() log_prior += num_on * np.log(self.theta) + num_off * np.log(1 - self.theta) # calculate the logliklihood log_lik = self._loglik(cur_y = cur_y, cur_z = cur_z) return log_prior + log_lik
def get_nlog_likelihood(self, shower_pars, data): self.iteration += 1 print("get_nlog_likelihood called {}. time".format(self.iteration)) self.seed.energy = shower_pars[0] * u.TeV self.seed.alt = shower_pars[1] * u.rad self.seed.az = shower_pars[2] * u.rad self.seed.core_x = shower_pars[3] * u.m self.seed.core_y = shower_pars[4] * u.m self.seed.h_shower_max = shower_pars[5] * u.m log_likelihood = 0. for (coordinates, measured, pixel_area) in self.get_parametrisation(self.seed, data): expected = self.evaluate_pdf(coordinates)*pixel_area log_likelihood += max(-200.,poisson.logpmf(measured, expected)) return -log_likelihood
def log_P_joint(Y, theta, eta): mu_attack = eta[0] mu_def = eta[1] tao_attack = eta[2] tao_def = eta[3] h_attacks = theta[:20] h_defs= theta[20:40] home = theta[40] # log P(eta) computed below log_p = np.log(norm.pdf(mu_attack, 0, 1/TAU_1)) log_p += np.log(norm.pdf(mu_def, 0, 1/TAU_1)) log_p + np.log(gamma.pdf(tao_attack, a=ALPHA, scale=1/BETA)) log_p + np.log(gamma.pdf(tao_def, a=ALPHA, scale=1/BETA)) # log P(theta | home) computed below log_p += np.log(norm.pdf(home, 0, 1/TAU_0)) # log P(h_attacks | eta) for h_attack in h_attacks: log_p + np.log(norm.pdf(h_attack, mu_attack, 1/tao_attack)) # log P(h_defs | eta) for h_def in h_defs: log_p + np.log(norm.pdf(h_def, mu_def, 1/tao_def)) # log P(Y|theta, eta) for g1, g2, t1, t2 in Y: # score (g1 : g2) in team t1 vs t2 t1, t2 = int(t1), int(t2) poisson_param_1 = np.exp(home + h_attacks[t1] - h_defs[t2]) poisson_param_2 = np.exp(h_attacks[t2] - h_defs[t1]) log_p += poisson.logpmf(g1, mu=poisson_param_1) log_p += poisson.logpmf(g2, mu=poisson_param_2) return log_p
def new_prob(events, f_DM, N_sources, error=0.05, baryons=True): N_D = events[1] N_B = events[0] #if (baryons): # N_B = np.loadtxt('baryon_events.txt') #else: # N_B = np.zeros_like(N_D) step = error / 5 llim = np.maximum(1 - 4 * error, 0) Ab = np.arange(llim, 1 + 4 * error, step) def p_alpha(a): return norm.logpdf(a, loc=1, scale=error) sources_per_LoS = 17e9 / N_sources Ptot = 0 i = 0 sum_log = np.zeros_like(Ab) k = np.expand_dims(np.expand_dims(np.arange(0, 100), 0), 0) N_D = np.expand_dims(N_D, 2) N_B = np.expand_dims(N_B, 2) for ab in Ab: karray = poisson.pmf(k, N_B) * np.nan_to_num( poisson.logpmf(k, (f_DM * N_D + ab * N_B))) Lpois = sources_per_LoS * np.sum(karray, 2) #print(np.any(pois==0)) #print(np.sum(Lpois)) #print(pois) #print(pois) #print(np.sum(np.log(pois))) sum_log[i] = np.sum(Lpois) + p_alpha(ab) + np.log(step) i = i + 1 #prod = np.prod(pois) #print('%f.d10'%prod) #Ptot = Ptot + prod*p_alpha(ab)*step logP = logsumexp(sum_log) #, b=p_alpha(Ab)*step) #print(logP) return logP
def _log_likelihood_poisson(self, n_observed, theta, nu, luminosity=300000.0, weights_benchmarks=None, total_weights=None): if total_weights is not None and nu is None: # `histo` mode: Efficient morphing of whole cross section for the case without nuisance parameters theta_matrix = self._get_theta_benchmark_matrix(theta) xsec = mdot(theta_matrix, total_weights) elif total_weights is not None and self.nuisance_morpher is not None: # `histo` mode: Efficient morphing of whole cross section for the case with nuisance parameters logger.debug("Using nuisance interpolation") theta_matrix = self._get_theta_benchmark_matrix(theta) xsec = mdot(theta_matrix, total_weights) nuisance_effects = self.nuisance_morpher.calculate_nuisance_factors( nu, total_weights.reshape((1, -1))).flatten() xsec *= nuisance_effects elif weights_benchmarks is not None: # `weighted` mode: Reweights existing events to (theta, nu) -- better than entirely new xsec calculation weights = self._weights([theta], [nu], weights_benchmarks)[0] xsec = sum(weights) elif weights_benchmarks is None: # `sampled` mode: Calculated total cross sections entirely new -- least efficient xsec = self.xsecs(thetas=[theta], nus=[nu], partition="train", generated_close_to=theta)[0][0] n_predicted = xsec * luminosity if xsec < 0: logger.warning( "Total cross section is negative (%s pb) at theta=%s)", xsec, theta) n_predicted = 10**-5 n_observed_rounded = int(np.round(n_observed, 0)) log_likelihood = poisson.logpmf(k=n_observed_rounded, mu=n_predicted) logger.debug( "Poisson log likelihood: %s (%s expected, %s observed at theta=%s)", log_likelihood, n_predicted, n_observed_rounded, theta, ) return log_likelihood
def logLike(self,knot_vals,x,y,log,data): """Get logLikelihood assuming spline model values in counts @param [float] knot_vals Spline values on knot mesh for logL @param [float] x x values to create evaluation mesh @param [float] y y values to create evaluation mesh @param bool log Is the spline working in log space? @param [float] data Data to compare to spline model for logL on x vs y mesh """ if log: self.build(np.power(10,knot_vals)) else: self.build(knot_vals) model = self.eval(x,y) logL = -poisson.logpmf(data.flatten(), model.flatten()).sum() return logL
def logp_e(self, i: int) -> float: """Basically use the error probability computed in the wall detection. In case of false-negative detection of E-intvls, compute another probability `logp_po` based only on the wall counts and E-cov. `E_PO_BASE` is for low-coverage non-E-intvls. """ I = self.intvls[i] logp_er = _log(I.pe) # TODO: store log pe logp_po = sum( [poisson.logpmf(c, self.cp.depths['E']) for c in (I.ccb, I.cce)]) + E_PO_BASE logp = max(logp_er, logp_po) if self.verbose_prob: print(f"ER={logp_er:5.0f}{'*' if logp_er >= logp_po else ' '} " f"PO={logp_po:5.0f}{'*' if logp_po >= logp_er else ' '}") return logp
def lnlike(p, parameters): observed_counts = parameters['observed_counts'] Nobs = len(observed_counts) detector_count_rate = detector_model_specialised(p, parameters) if not len(detector_count_rate) == Nobs-1: print(len(detector_count_rate), Nobs) assert False #scale counts so that total number of counts is preserved (?) # detector_count_rate lp = poisson.logpmf(observed_counts[1:], detector_count_rate) lp = lp.sum() #f, ax = plt.subplots() #ax.plot(observed_counts) #ax.plot(detector_count_rate) #plt.show() return lp
def tversky_sample_l(self): run_lengths = np.diff(self.clusters + [self.total_trial]) l_grid = range(1,22) l_log_p_grid = np.zeros(len(l_grid)) if self.prior_type == 'Poisson': for i in xrange(len(l_grid)): l_log_p_grid[i] = poisson.logpmf(run_lengths, l_grid[i]).sum() #print(l_log_p_grid) self.l = np.random.choice(a = l_grid, p = lognormalize(l_log_p_grid)) elif self.prior_type == 'Geometric': # Beta(alpha + total_number_of_runs, beta + sum(all_run_lengths) - total_number_of_runs) self.l = np.random.beta(a = self.geom_prior_alpha + total_number_of_runs, b = self.geom_prior_beta + total_run_length - total_number_of_runs)
def predict(self, Y, X, parameter_sample): """Given a sample of (X,Y) as well as a sample of network parameters, compute p_{\theta}(Y|X) and compare against the actual values of Y""" # Get the lambda(X, \theta_i) for all parameters \theta_i in sample. # The i-th column corresponds to lambda(X, \theta_i). lambdas = np.matmul(X, np.transpose(parameter_sample)) # Get the predictive/test log likelihoods predictive_log_likelihoods = poisson.logpmf(Y[:, None], lambdas) # Get the MSE and MAE SE = (Y[:, None] - lambdas)**2 AE = np.abs(Y[:, None] - lambdas) return (predictive_log_likelihoods, SE, AE)
def prior_k(self, k): ''' Log-prior for k, the number of atomic terms in the marker. Poisson ''' if k <= 0: return -np.inf else: if self.pgeomk is None: if self.theta is not None: # Poisson for k - 1, since P(k = 0) = 0 return poisson.logpmf(k - 1, self.theta) else: return 0 else: return geom.logpmf(k - 1, self.pgeomk)
def get_optimal_observer_prediction(datum, meanData, signal_noise_even=False): llVals = [] num_signals = len(meanData) - 1 for i, meanDatum in enumerate(meanData): log_probability = poisson.logpmf(datum, meanDatum).sum() # all is fine, if categories are evenly distributed. But if not, we have to add the log of their proba- # bility. if signal_noise_even: # if i is 0, we know it's the non signal case and use a p of 0.5 if i == 0: log_probability += np.log(0.5) # if i > 0, we know it's a signal case else: log_probability += np.log(0.5 / num_signals) llVals.append(log_probability) prediction = np.argmax(llVals) return prediction
def test_poisson(): # Test we can at match a Binomial distribution from scipy mu = 2 dist = lk.Poisson() x = np.random.randint(low=0, high=5, size=(10,)) p1 = poisson.logpmf(x, mu) p2 = dist.loglike(x, mu) np.allclose(p1, p2) p1 = poisson.cdf(x, mu) p2 = dist.cdf(x, mu) np.allclose(p1, p2)
def intra_log_likelihood(self, data): # if p is not provided, estiamte with observed data if self.p is None: self.p = estimate_p(data, mask=self.mask) print "Assignable probabilities are calculated using observed data." p1, p2, p3, p4 = multinomial_p(self.p, mask=self.mask) # masked observed data oaa, oab, oba, obb = data['aa'][self.mask], data['ab'][ self.mask], data['ba'][self.mask], data['bb'][self.mask] oax, oxa, obx, oxb, oxx = data['ax'][self.mask], data['xa'][self.mask], data['bx'][self.mask], \ data['xb'][self.mask], data['xx'][self.mask] # masked poisson lambda # !!! add multiple alpha lambda_mat = poisson_lambda_multialpha(self.x, self.beta, self.alpha_mat, self.alpha_pat, self.alpha_inter, self.bias) laa, lab, lba, lbb = disjoin_matrix(lambda_mat, self.n, mask=self.mask) # masked gamma log_gs, log_ngs = gamma_matrix(self.gamma, self.n, self.mask) # sumout log-likelihood ll = float('-inf') # start with log(0) for ztuple in itertools.product((0, 1), repeat=4): # ztuple = zaa(0), zab(1), zba(2), zbb(3) f = 0 # log f(obs|z) * f(z) for a given z assignment # log f(z) = z * log(g) + (1-z) * log(1-g) for z, log_g, log_ng in itertools.izip(ztuple, log_gs, log_ngs): f += log_g if z == 1 else log_ng # log f(obs) = z * log_poisson(p1 * lambda) + (1-z) * log_1(obs==0) # poisson.logpmf(k,0) = 0 if k == 0 else = -inf for z, lmd, obs in itertools.izip((ztuple[0], ztuple[3]), (laa, lbb), (oaa, obb)): f += poisson.logpmf(obs, z * p1 * lmd) # ax = aa + ab lax = ztuple[0] * laa + ztuple[1] * lab f += poisson.logpmf(oax, p2 * lax) # xa = aa + ba lxa = ztuple[0] * laa + ztuple[2] * lba f += poisson.logpmf(oxa, p3 * lxa) # bx = ba + bb lbx = ztuple[2] * lba + ztuple[3] * lbb f += poisson.logpmf(obx, p2 * lbx) # xb = ab + bb lxb = ztuple[1] * lab + ztuple[3] * lbb f += poisson.logpmf(oxb, p3 * lxb) # xx = aa + ab + ba + bb lxx = ztuple[0] * laa + ztuple[1] * lab + ztuple[2] * lba + ztuple[ 3] * lbb f += poisson.logpmf(oxx, p4 * lxx) # log(exp(ll) + exp(f)) ll = np.logaddexp(ll, f) return ll.sum()
def ML_Pois_Naiv( data, model_pred, ): """ This function calculates the log-likelihood of the POISSON of the measurment of illnes in israel. It assumes the number of tests is n_{j,k,t}, the probability for getting a result is p_{j,k,t} - the model prediction, and the data point is q_{j,k,t}. in total the likelihood P(X=q)~Bin(n,p) per data point. For cells (specific t,j,k triplet) of not sufficient number of tests: with n_{j,k,t} < threshold the likelihood will be ignored. :param data: np.array of 4 dimensions : axis 0: n, q - representing different values: starting from total tests, and than positives rate. axis 1: t - time of sample staring from the first day in quastion calibrated to the model. axis 2: k - area index axis 3: j - age index data - should be smoothed. (filled with zeros where no test accured) :param model_pred: np.ndarray of 3 dimensions representing the probability: axis 1: t - time of sample staring from the first day in quastion calibrated to the model. axis 2: k - area index axis 3: j - age index :return: the -log-likelihood of the data given the prediction of the model. """ factor = 1 q = data p = factor * model_pred ## ### # poison approx. ## ## ### ll = -poisson.logpmf( k=q, mu=p, ) # cut below threshold values ll = np.nan_to_num(ll, nan=0, posinf=0, neginf=0) return ll.sum()
def _negloglikehood(self, coefs): ''' estimates a loglikehood supposing that the new cases follows a (a) poisson process if len(coefs) == 2*nbetas + 1 (b) a negative binomial if len(coefs) == 2*nbetas + 2 with the model new cases as the parameter ''' ts, mY = self._call_ODE(self.t, self._conversor(coefs)) if self.use_tot: mus = self.N * mY[:, -1] ks = self.Y else: mus = self.N * np.diff(mY[:, -1]) ks = np.diff(self.Y) if len(coefs) == 2 * self.nbetas + 1: return -(poisson.logpmf(ks, mus)).sum() else: return -(nbinom.logpmf(ks, coefs[-1], coefs[-1] / (mus + coefs[-1]))).sum()
def calculate_posterior_cluster_joint_copy_number(pi, alpha, theta, data): K = pi.shape[1] J = alpha.shape[1] N, M = data.shape[0], data.shape[1] result, p = np.zeros((K, J, N, M)), np.zeros((K, J, N, M)) for n in range(N): for m in range(M): for j in range(J): for k in range(K): p[k, j, n, m] = pi[n, k] + alpha[k, j] + poisson.logpmf( data[n, m], theta[n] * (j + 1)) for n in range(N): for m in range(M): result[:, :, n, m] = p[:, :, n, m] - logsumexp(p[:, :, n, m], axis=( 0, 1)) # / (np.sum(np.exp(p[:, :, n, m])) + .0000001) return np.exp(result)
def nloglikeobs_wzp(self, params): beta = params[:-1] endog = self.endog gamma = 1 / (1 + np.exp(params[-1])) #check this XB = np.dot(self.exog, beta) nY, nX = self.endog, self.exog # nloglik = - np.log(1-gamma) - poisson.logpmf(nY,np.exp(XB)) # print gamma nloglik = -np.log( (1 - gamma) * self.zp) - poisson.logpmf(nY, np.exp(XB)) nloglik[endog == 0] = -np.log(gamma * self.zp[endog == 0] + np.exp(-nloglik[endog == 0])) # print nloglik, gamma # self.cnter += 1 # if self.cnter > 10: sys.exit() # nloglik[endog==0] = - np.log(gamma + np.exp(-nloglik[endog==0])) return nloglik
def metropolis_fsmp(y, A, sig2w, sig2s, mus, p1, p_cha, mu_t, TRIALS=2000): ''' p1: prior probability for each bin. sig2w: variance of white noise. sig2s: variance of signal x_i. mus: mean of signal x_i. ''' # Only for multi-gaussian with arithmetic sequence of mu and sigma # N: number of t bins # M: length of the waveform clip M, N = A.shape # nu_root: nu for all s_n=0. nu_root = -0.5 * np.linalg.norm(y)**2 / sig2w - 0.5 * M * np.log(2 * np.pi) nu_root -= 0.5 * M * np.log(sig2w) nu_root += poisson.logpmf(0, p1).sum() # Eq. (29) cx_root = A / sig2w # mu = 0 => (y - A * mu -> z) z = y.copy() # Metropolis flow flip, Δν_history, es_history, c_star_list, T_list, NPE0, number_sample_zero = flow( cx_root, p1, z, N, sig2s, sig2w, mus, A, p_cha, mu_t, TRIALS=TRIALS) num = len(T_list) c_star = np.vstack(c_star_list) nu_star = np.cumsum(Δν_history[0]) + nu_root + Δν_history[1] burn = num // 5 nu_star = nu_star[burn:] T_list = T_list[burn:] c_star = c_star[burn:, :] flip[np.abs(flip) == 2] = 0 # 平移不改变 PE 数 NPE_evo = np.cumsum(np.insert(flip, 0, NPE0))[burn:] es_history = es_history[es_history['step'] >= burn] return nu_star, T_list, c_star, es_history, NPE_evo, number_sample_zero
def get_optimal_observer_hit_false_alarm(testData, testLabels, meanData): hits = [] falseAlarms = [] allAccuracies = [] predictions = [] if len(meanData) > 2: return 0 for datum, label in zip(testData, testLabels): llVals = [] for meanDatum in meanData: llVals.append(poisson.logpmf(datum, meanDatum).sum()) prediction = np.argmax(llVals) predictions.append(prediction) if label == 1: # signal hits.append(label == prediction) else: falseAlarms.append(label != prediction) if label != prediction: # print("test") pass allAccuracies.append(prediction == label) d = norm.ppf(np.mean(hits)) - norm.ppf(np.mean(falseAlarms)) return d
def _compute_log_likelihood(self, X): matrix = [] lookup = {} for x in X: row = [] for state in range(self.n_components): #state res = 0 for dim in range(self.n_features): #dim for comp in range(self.distr_magnitude): index = (x[dim], self.p[dim][comp][state]) if lookup.has_key( index ): res += lookup[index] * self.c[dim][comp][state] else: y = poisson.logpmf(x[dim], self.p[dim][comp][state]) #lookup[index] = y res += y * self.c[dim][comp][state] #print(y, self.c[dim][comp][state]) row.append(res) #print(self.c) #print(x, row) matrix.append(row) return np.asarray(matrix)
def _log_likelihood_poisson(self, n_observed, theta, nu, luminosity=300000.0, weights_benchmarks=None, total_weights=None): if total_weights is not None: theta_matrix = self._get_theta_benchmark_matrix(theta) xsec = mdot(theta_matrix, total_weights) if weights_benchmarks is None: xsec = self.xsecs(thetas=[theta], nus=[nu], partition="train", generated_close_to=theta)[0][0] else: weights = self._weights([theta], [nu], weights_benchmarks)[0] xsec = sum(weights) n_predicted = xsec * luminosity if xsec < 0: logger.warning( "Total cross section is negative (%s pb) at theta=%s)", xsec, theta) n_predicted = 10**-5 n_observed_rounded = int(np.round(n_observed, 0)) log_likelihood = poisson.logpmf(k=n_observed_rounded, mu=n_predicted) logger.debug( "Poisson log likelihood: %s (%s expected, %s observed at theta=%s)", log_likelihood, n_predicted, n_observed_rounded, theta, ) return log_likelihood
def gpd_lik_internal(params): LAMBDA, alpha, k = params # part from gpd only y = (data - chi) / alpha if abs(k) > kthresh: # gpd y = (1 - k * y) if np.any(y <= 0) or alpha <= 0: ll_gpd = np.inf else: ll_gpd = length * \ sp.log(alpha) + (1 - 1 / k) * np.sum(sp.log(y)) else: # expon ll_gpd = length * sp.log(alpha) + y.sum() # add part from peak frequency # expand likelihood function to account for sample size distribution # see theoretical details in Coles p.82. # Here we deviate from the approach in Coles in the sense that lamda # (numb of exceedances per time unit) is considered as the third # parameter. Moreover, the the variance of lamda is modelled assuming # a poisson model for the exceedances rather than a binomial model. # This avoids knowledge of the full sample size (number of total # observations) which is not really known anyway if the exceedances # were determined after declustering. The assumption of the poisson # model is just that the exceedances are very rare, which is probably # a good assumption for for high thresholds anyway. # The formuli are in my notes. if LAMBDA <= 0: ll_poisson = np.inf else: ll_poisson = -poisson.logpmf(length, LAMBDA * tim) return ll_gpd + ll_poisson
def nll_poisson(data, model, parameter_values, parameter_constraints): r"""A negative log-likelihood function assuming Poisson statistics for each measurement. The cost function is given by: .. math:: C = -2 \ln \mathcal{L}({\bf d}, {\bf m}) = -2 \ln \prod_j \mathcal{L}_{\rm Poisson} (k=d_j, \lambda=m_j) + C({\bf p}) .. math:: \rightarrow C = -2 \ln \prod_j \frac{{m_j}^{d_j} \exp(-m_j)}{d_j!} + C({\bf p}) In the above, :math:`{\bf d}` are the measurements, :math:`{\bf m}` are the model predictions, and :math:`C({\bf p})` is the additional cost resulting from any constrained parameters. :param data: measurement data :math:`{\bf d}` :param model: model predictions :math:`{\bf m}` :param parameter_values: vector of parameters :math:`{\bf p}` :param parameter_constraints: list of fit parameter constraints :return: cost function value """ _par_cost = 0.0 if parameter_constraints is not None: for _par_constraint in parameter_constraints: _par_cost += _par_constraint.cost(parameter_values) _total_log_likelihood = np.sum(poisson.logpmf(data, mu=model, loc=0.0)) # guard against returning NaN if np.isnan(_total_log_likelihood): return np.inf return -2.0 * _total_log_likelihood + _par_cost
def test_logpmf_p2(self): poisson_pmf = poisson.logpmf(6, 1) genpoisson_pmf = sm.distributions.genpoisson_p.logpmf(6, 1, 0, 2) assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
#fitBase = 'holdout_fitList_190513cA'; ### RVCFITS #rvcBase = 'rvcFits_191023'; # direc flag & '.npy' are added rvcBase = 'rvcFits_200507' # direc flag & '.npy' are added # first the fit type fitSuf_fl = '_flat' fitSuf_wg = '_wght' # then the loss type if lossType == 1: lossSuf = '_sqrt.npy' loss = lambda resp, pred: np.sum(np.square(np.sqrt(resp) - np.sqrt(pred))) elif lossType == 2: lossSuf = '_poiss.npy' loss = lambda resp, pred: poisson.logpmf(resp, pred) elif lossType == 3: lossSuf = '_modPoiss.npy' loss = lambda resp, r, p: np.log(nbinom.pmf(resp, r, p)) elif lossType == 4: lossSuf = '_chiSq.npy' # LOSS HERE IS TEMPORARY loss = lambda resp, pred: np.sum(np.square(np.sqrt(resp) - np.sqrt(pred))) fitName_fl = str(fitBase + fitSuf_fl + lossSuf) fitName_wg = str(fitBase + fitSuf_wg + lossSuf) # set the save directory to save_loc, then create the save directory if needed if diffPlot == 1: compDir = str(fitBase + '_comp' + lossSuf + '/diff') else:
def logprob(self, data, G): Y = turn_into_iterable(data["Y"]) Z = turn_into_iterable(data["Z"]) parameter = [G._node[z]["theta"][0] for z in Z] return poisson.logpmf(Y, mu=parameter)
def test_logpmf(self): poisson_logpmf = poisson.logpmf(7, 3) zipoisson_logpmf = sm.distributions.zipoisson.logpmf(7, 3, 0.1) assert_allclose(poisson_logpmf, zipoisson_logpmf, rtol=5e-2, atol=5e-2)
def _compute_log_likelihood(self, X): ret = np.sum(poisson.logpmf(X, self.means_[0]), axis=1) for i in self.means_[1:]: ret = np.vstack((ret, np.sum(poisson.logpmf(X, i), axis=1))) return ret.T
# Create dataset dataset = bmcc.GaussianMixture(n=1000, k=4, d=3, r=0.7, alpha=40, df=3, symmetric=False, shuffle=False) # Create mixture models model_mfm = bmcc.GibbsMixtureModel( data=dataset.data, component_model=bmcc.NormalWishart(df=3), mixture_model=bmcc.MFM(gamma=1, prior=lambda k: poisson.logpmf(k, 4)), assignments=np.zeros(1000).astype(np.uint16), thinning=5) model_dpm = bmcc.GibbsMixtureModel( data=dataset.data, component_model=bmcc.NormalWishart(df=3), mixture_model=bmcc.DPM(alpha=1, use_eb=True), assignments=np.zeros(1000).astype(np.uint16), thinning=5) # Run Iterations print("MFM:") for i in tqdm(range(5000)): model_mfm.iter() print("DPM:") for i in tqdm(range(5000)):
def lnlike(p, observed_counts, one_sided_prf, detector_background): detector_count_rate = detector_model(p, one_sided_prf, detector_background) lp = poisson.logpmf(observed_counts, detector_count_rate) lp = lp.sum() return lp
def test_logpmf_zero(self): poisson_logpmf = poisson.logpmf(5, 1) zipoisson_logpmf = sm.distributions.zipoisson.logpmf(5, 1, 0) assert_allclose(poisson_logpmf, zipoisson_logpmf, rtol=1e-12)
def safe_poisson_logpmf2(n, alphas): res = np.zeros((len(alphas), )) res[alphas > 0] = poisson.logpmf(n, alphas[alphas > 0]) if n > 0: res[alphas == 0] = MIN_LOG_PROB return res
d=3, r=0.7, alpha=40, df=3, symmetric=False, shuffle=False) def hybrid(*args, **kwargs): for _ in range(5): bmcc.gibbs(*args, **kwargs) bmcc.split_merge(*args, **kwargs) mm = bmcc.MFM(gamma=1, prior=lambda k: poisson.logpmf(k, 3)) # mm = bmcc.DPM(alpha=1, use_eb=False) cm = bmcc.NormalWishart(df=3) # Create mixture model model = bmcc.BayesianMixture(data=dataset.data, sampler=hybrid, component_model=cm, mixture_model=mm, assignments=np.zeros(POINTS).astype(np.uint16), thinning=THINNING) # Run Iterations start = time.time() for i in tqdm(range(ITERATIONS)): model.iter()
def _logprob(self, sample): """Calculate the joint log probability of data and model given a sample. """ cur_y, cur_z, cur_r = sample log_prior = 0 log_lik = 0 if cur_z.shape[1] == 0: return -999999999.9 if self.cl_mode: a_time = time() d_cur_z = cl.Buffer(self.ctx, self.mf.READ_ONLY | self.mf.COPY_HOST_PTR, hostbuf = cur_z.astype(np.int32)) d_cur_y = cl.Buffer(self.ctx, self.mf.READ_ONLY | self.mf.COPY_HOST_PTR, hostbuf = cur_y.astype(np.int32)) d_cur_r = cl.Buffer(self.ctx, self.mf.READ_ONLY | self.mf.COPY_HOST_PTR, hostbuf = cur_r.astype(np.int32)) d_z_by_ry = cl.Buffer(self.ctx, self.mf.READ_WRITE | self.mf.COPY_HOST_PTR, hostbuf = np.empty(shape = self.obs.shape, dtype = np.int32)) transformed_y = np.empty(shape = (self.obs.shape[0], cur_z.shape[1], self.obs.shape[1]), dtype = np.int32) d_transformed_y = cl.Buffer(self.ctx, self.mf.READ_WRITE | self.mf.COPY_HOST_PTR, hostbuf = transformed_y) d_temp_y = cl.Buffer(self.ctx, self.mf.READ_WRITE | self.mf.COPY_HOST_PTR, hostbuf = transformed_y) # calculate the log prior of Z d_logprior_z = cl.array.empty(self.queue, cur_z.shape, np.float32) self.prg.logprior_z(self.queue, cur_z.shape, (1, cur_z.shape[1]), d_cur_z, d_logprior_z.data, cl.LocalMemory(cur_z[0].nbytes), #cl.LocalMemory(cur_z.nbytes), np.int32(self.N), np.int32(cur_y.shape[1]), np.int32(cur_z.shape[1]), np.float32(self.alpha)) # calculate the loglikelihood of data # first transform the feature images and calculate z_by_ry self.prg.compute_z_by_ry(self.queue, cur_z.shape, (1, cur_z.shape[1]), d_cur_y, d_cur_z, d_cur_r, d_transformed_y, d_temp_y, d_z_by_ry, np.int32(self.obs.shape[0]), np.int32(self.obs.shape[1]), np.int32(cur_y.shape[0]), np.int32(self.img_w)) loglik = np.empty(shape = self.obs.shape, dtype = np.float32) d_loglik = cl.Buffer(self.ctx, self.mf.READ_WRITE | self.mf.COPY_HOST_PTR, hostbuf = loglik) self.prg.loglik(self.queue, self.obs.shape, None, d_z_by_ry, self.d_obs, d_loglik, np.int32(self.N), np.int32(cur_y.shape[1]), np.int32(cur_z.shape[1]), np.float32(self.lam), np.float32(self.epislon)) cl.enqueue_copy(self.queue, loglik, d_loglik) log_lik = loglik.sum() self.gpu_time += time() - a_time # calculate the prior probability of Y num_on, num_off = (cur_y == 1).sum(), (cur_y == 0).sum() log_prior = num_on * np.log(self.theta) + num_off * np.log(1 - self.theta) + d_logprior_z.get().sum() # calculate the prior probability of R # we implement a slight bias towards no transformation log_prior += (cur_r > 0).sum() * np.log(1 - self.phi) + (cur_r == 0).sum() * np.log(self.phi) else: # calculate the prior probability of Z feat_count = cur_z.cumsum(axis = 0) for n in xrange(cur_z.shape[0]): num_novel = 0 for k in xrange(cur_z.shape[1]): m = feat_count[n,k] - cur_z[n,k]#cur_z[:n,k].sum() if m > 0: if cur_z[n,k] == 1: log_prior += np.log(m / (n+1.0)) else: log_prior += np.log(1 - m / (n + 1.0)) else: if cur_z[n,k] == 1: num_novel += 1 if num_novel > 0: log_prior += poisson.logpmf(num_novel, self.alpha / (n+1.0)) # calculate the prior probability of Y num_on = (cur_y == 1).sum() num_off = (cur_y == 0).sum() log_prior += num_on * np.log(self.theta) + num_off * np.log(1 - self.theta) # calculate the prior probability of R # we implement a slight bias towards no transformation log_prior += (cur_r > 0).sum() * np.log(1 - self.phi) + (cur_r == 0).sum() * np.log(self.phi) # calculate the logliklihood log_lik = self._loglik(cur_y = cur_y, cur_z = cur_z, cur_r = cur_r) return log_prior + log_lik
def calc_likelihood(self, p0, s0, b): """ Calculate the logLikelihood of a given sequence configuration. The model is adapted from GRAAL. Counts are Poisson, with lambda parameter dependent on expected observed contact rate as a function of inter-fragment separation. This has been shown experimentally to be modelled effectively by a power-law (used here). :param p0: minimum probability of a contact :param s0: characteristic distance at which probability = p0 :param b: exponential parameter :return: """ # prepare a mask of sequences to skip in calculation seq_masked = np.empty(len(self.order.names), dtype=np.bool) seq_masked.fill(False) seq_masked[self.order.no_sites | self.groupings.bins == Grouping.MASK] = True print "{0} sequences will be masked in likelihood calculation".format(np.sum(seq_masked)) Nd = np.sum(self.raw_map) sumL = 0.0 num_included_seq = len(self.order.names) for i in xrange(num_included_seq): # if self.order.no_sites[i]: if seq_masked[i]: continue for j in xrange(i + 1, num_included_seq): # if self.order.no_sites[j]: if seq_masked[j]: continue # inter-contig separation defined by cumulative # intervening contig length. L = self.order.intervening(i, j) # bin centers centers_i = self.groupings.centers[i] centers_j = self.groupings.centers[j] # determine relative origin for measuring separation # between sequences. If i comes before j, then distances # to j will be measured from the end of i -- and visa versa if self.order.is_first(i, j): s_i = self.order.lengths[i] - centers_i s_j = centers_j else: s_i = centers_i s_j = self.order.lengths[j] - centers_j # d_ij = np.abs(L + s_i[:, np.newaxis] - s_j) # Here we are using a peice-wise continuous function defined in GRAAL # to relate separation distance to Poisson rate parameter mu. # Above a certain separation distance, the probability reaches a constant minimum value. q_ij = np.piecewise( d_ij, [d_ij < 3e6, d_ij > 3e6], [lambda x: 0.5 * (1.0 / 3e6 - (1.0 - 6e-6) ** x * np.log(1.0 - 6e-6)), 1.0e-8], ) n_ij = self.get_submatrix(i, j) p_ij = poisson.logpmf(n_ij, mu=(Nd * q_ij)) sumL += np.sum(p_ij) return sumL