def logPrior(self): prior = 0.0 prior += gamma.logpdf(self.par[0], 2.0, scale=6.0) prior += beta.logpdf(0.5 * (self.par[1] + 1.0), 2.0, 2.0) prior += beta.logpdf(0.5 * (self.par[2] + 1.0), 2.0, 2.0) prior += beta.logpdf(0.5 * (self.par[3] + 1.0), 2.0, 2.0) return(prior)
def logPrior(self): prior = 0.0 prior += gamma.logpdf(self.par[0], 2.0, scale=6.0) prior += beta.logpdf(0.5 * (self.par[1] + 1.0), 2.0, 2.0) prior += beta.logpdf(0.5 * (self.par[2] + 1.0), 2.0, 2.0) prior += beta.logpdf(0.5 * (self.par[3] + 1.0), 2.0, 2.0) return (prior)
def credible_interval(k, n, confidence_level=.95, tolerance=1e-6): # Function for estimating width of credible interval. # Find the highest posterior density interval using binary search. p_min_lower = float(0) p_middle = p_min_upper = p_max_lower = k / float(n) p_max = p_max_upper = float(1) p_min_middle = (p_min_lower + p_middle) / 2 # == 0 if k == 0. p_max_middle = (p_middle + p_max) / 2 # == n if k == n if (k == 0): # Exception handling # p_min_middle = 0 # Per definition... it's the peak. while (abs(beta.cdf(p_max_middle, 1, n + 1) - confidence_level) > tolerance): if (beta.cdf(p_max_middle, 1, n + 1) > confidence_level): p_max_upper = p_max_middle else: p_max_lower = p_max_middle p_max_middle = (p_max_lower + p_max_upper) / 2 elif (k == n): # Exception handling while (abs(1 - beta.cdf(p_min_middle, k + 1, 1) - confidence_level) > tolerance): if (1 - beta.cdf(p_min_middle, k + 1, 1) > confidence_level): p_min_lower = p_min_middle else: p_min_upper = p_min_middle p_min_middle = (p_min_lower + p_min_upper) / 2 else: # Main case while (abs( beta.cdf(p_max_middle, k + 1, n - k + 1) - beta.cdf(p_min_middle, k + 1, n - k + 1) - confidence_level) > tolerance / 2): # Binary search # Reset p-max values for new iteration: p_max_lower = p_middle p_max_upper = p_max p_max_middle = (p_max_lower + p_max_upper) / 2 while (abs( beta.logpdf(p_min_middle, k + 1, n - k + 1) - beta.logpdf(p_max_middle, k + 1, n - k + 1)) > tolerance / 2): # Binary search to find p_max corresponding to p_min (same value in pdf). if (k * np.log(p_min_middle) + (n - k) * np.log(1 - p_min_middle) > k * np.log(p_max_middle) + (n - k) * np.log(1 - p_max_middle)): p_max_upper = p_max_middle else: p_max_lower = p_max_middle p_max_middle = (p_max_lower + p_max_upper) / 2 if (beta.cdf(p_max_middle, k + 1, n - k + 1) - beta.cdf(p_min_middle, k + 1, n - k + 1) > confidence_level): p_min_lower = p_min_middle else: p_min_upper = p_min_middle p_min_middle = (p_min_lower + p_min_upper) / 2 return (dict([('p_min', p_min_middle), ('p_max', p_max_middle)]))
def get_outliers(data, filter, plotting): if plotting: for x, r in [("x1", (0, 1)), ("x2", (0, 30)), ("x3", (0, 1))]: plt.violinplot(data[x], vert=False) plt.xlim(r) plt.savefig("plots/violin/%s.png" % x) plt.clf() if filter: data_fl = data[data["class"] == 0] else: data_fl = data pdf = pd.DataFrame({}) a, b, loc, scale = beta.fit(data_fl["x1"]) pdf["x1"] = beta.logpdf(data["x1"], a, b, loc=loc, scale=scale) a, loc, scale = gamma.fit(data_fl["x2"]) pdf["x2"] = gamma.logpdf(data["x2"], a, loc=loc, scale=scale) a, b, loc, scale = beta.fit(data_fl["x3"]) pdf["x3"] = beta.logpdf(data["x3"], a, b, loc=loc, scale=scale) pdfs = pdf["x1"] + pdf["x2"] + pdf["x3"] if plotting: sns.boxplot(y=pdfs, x="class", data=data) plt.savefig("plots/boxplot.png") plt.clf() if plotting: plt.plot(np.sort(pdfs)) splits = [40, 45, 50, 60] for split in splits: split = np.sort(pdfs)[60] plt.plot((0, 1000), (split, split), 'k-', lw=0.5) split = np.sort(pdfs)[50] plt.plot((0, 1000), (split, split), 'k.', lw=0.5) split = np.sort(pdfs)[45] plt.plot((0, 1000), (split, split), 'k--', lw=0.5) split = np.sort(pdfs)[40] plt.plot((0, 1000), (split, split), 'k--', lw=0.5) plt.savefig("plots/thresholds.png") plt.clf() outliers = np.argsort(pdfs) final = [] for outlier in outliers: if data["class"][outlier] == -1: final.append(outlier) return np.array(final[:100])
def ComputeLRT(adjmatrix, ncr, ntr, nruns): #print(adjmatrix) adjmatrix[adjmatrix == 0] = [1] adjmatrix[adjmatrix == 100] = [99] #print(adjmatrix) intra_cluster = [] for i in range(0, ncr): for j in range(i + 1, ncr): intra_cluster.append(adjmatrix[(i, j)]) for i in range(ncr, ntr + ncr): for j in range(i + 1, ncr + ntr): intra_cluster.append(adjmatrix[(i, j)]) inter_cluster = [] for i in range(0, ncr): for j in range(ncr, ncr + ntr): inter_cluster.append(adjmatrix[(i, j)]) intra_cluster = np.array(intra_cluster) / nruns inter_cluster = np.array(inter_cluster) / nruns Stability = np.sum(intra_cluster) / (np.sum(intra_cluster) + np.sum(inter_cluster)) intra_cluster = [0.01 if x == 0 else x for x in intra_cluster] intra_cluster = [0.99 if x == 1 else x for x in intra_cluster] inter_cluster = [0.01 if x == 0 else x for x in inter_cluster] inter_cluster = [0.01 if x == 1 else x for x in inter_cluster] #print(np.var(intra_cluster),np.var(inter_cluster)) if float(np.var(intra_cluster)) <= 0.0000001: #print(intra_cluster) for i in range(0, len(intra_cluster)): intra_cluster[i] = intra_cluster[i] + (i + 1) / 1000 #print(intra_cluster) if float(np.var(inter_cluster)) <= 0.0000001: for i in range(0, len(inter_cluster)): inter_cluster[i] = inter_cluster[i] + (i + 1) / 1000 a1, b1 = estBetaParams(intra_cluster) l1 = beta.logpdf(intra_cluster, a1, b1) a2, b2 = estBetaParams(inter_cluster) l2 = beta.logpdf(inter_cluster, a2, b2) a3, b3 = estBetaParams(np.append(intra_cluster, inter_cluster)) l0 = beta.logpdf(np.append(intra_cluster, inter_cluster), a3, b3) LR = 2 * ((np.sum(l1) + np.sum(l2)) - np.sum(l0)) if math.isnan(LR): exit() p = chi2.sf(LR, 2) #print(a1,b1,a2,b2,a3,b3) return (LR, p, Stability)
def hyper_param_inf(self, corpus, params, score): if self.verbose >= 1: print "\n****** HP INFERENCE *******" for i in range(params.n_hypermoves): if self.verbose > 1: print "\n--- current params ---" params.show() print "hyper param score:" + str(score) print " a_nr: " + str( gamma.logpdf(params.alpha_r, params.alpha_r_hp)) print " a_r: " + str( gamma.logpdf(params.alpha_nr, params.alpha_nr_hp)) print " empty_i: " + str( beta.logpdf(params.empty_intent, params.intent_hp_a, params.intent_hp_b)) new_params = Params() new_params.propose_hyper_params(params) new_score = self.score_full_lex(corpus, new_params) # print "* scoring" # params.show() if self.verbose > 1: print "--- new params ---" new_params.show() print "hyper param score:" + str(new_score) print " a_nr: " + str( gamma.logpdf(new_params.alpha_r, new_params.alpha_r_hp)) print " a_r: " + str( gamma.logpdf(new_params.alpha_nr, new_params.alpha_nr_hp)) print " empty_i: " + str( beta.logpdf(new_params.empty_intent, new_params.intent_hp_a, new_params.intent_hp_b)) if new_score - score > 0: params = new_params elif random() < exp(new_score - score): params = new_params if self.verbose >= 1: print " hp change! - old = %2.2f, new = %2.2f" % ( score, new_score) # now rescore with the new parameters - redundant if you didn't swap, FIXME self.score_full_lex(corpus, params) return params
def log_lk(newly_exposed, newly_infected, unobserved, exposed_p, external_sources_p, infected_p, tested_p, tested_contact_p, dead_p, immune_p, susceptible_p, dead_alpha, dead_beta, test_alpha, test_beta, contact_alpha, contact_beta): if (exposed_p <= 0 or external_sources_p <= 0 or infected_p <= 0 or (tested_p <= 0).any() or (tested_contact_p <= 0).any() or (dead_p <= 0).any() or immune_p <= 0 or susceptible_p <= 0 or # exposed_p>=1 or external_sources_p >= 1 or infected_p >= 1 or (tested_p >= 1).any() or (tested_contact_p >= 1).any() or (dead_p >= 1).any() or immune_p >= 1 or susceptible_p >= 1): return -numpy.inf, None cum_exposed = numpy.hstack((numpy.zeros_like(populations), cumsum(newly_exposed - newly_infected, axis=1))) cum_unobserved = numpy.hstack( (numpy.zeros_like(populations), cumsum(unobserved, axis=1))) cum_unknown_infected = numpy.hstack( (numpy.zeros_like(populations), cumsum(newly_infected - unobserved - confirmed, axis=1))) cum_susceptible = populations - cum_exposed - cum_unknown_infected - cum_confirmed - cum_deaths - cum_recovered - cum_unobserved log_lk = numpy.vstack(( # Susceptible people get exposed to infected or tested binom.logpmf( newly_exposed, cum_susceptible[:, :-1], exposed_p * (cum_unknown_infected[:, :-1] + tested_contact_p[:, None] * cum_confirmed[:, :-1]) + (external_sources_p)), # Exposed people become infected binom.logpmf(newly_infected, cum_exposed[:, :-1], infected_p), # People might recover or die before they are tested binom.logpmf(unobserved, cum_unknown_infected[:, :-1], dead_p[:, None] + immune_p), # Infected people become tested binom.logpmf(confirmed, cum_unknown_infected[:, :-1] - unobserved, tested_p[:, None]), # Tested people recover binom.logpmf(recovered, cum_confirmed[:, :-1], immune_p), # or they die binom.logpmf(deaths, cum_confirmed[:, :-1], dead_p[:, None]), )) errors = ~numpy.isfinite(log_lk) log_lk[errors] = 0 prior = ( beta.logpdf(dead_p, dead_alpha, dead_beta).sum(), beta.logpdf(tested_p, test_alpha, test_beta).sum(), beta.logpdf(tested_contact_p, contact_alpha, contact_beta).sum(), ) return numpy.sum(log_lk) + sum(prior), errors
def sample_trunc_beta(a, b, lower, upper): """ Samples from a truncated beta distribution in log space Parameters ---------- a, b: float Canonical parameters of the beta distribution lower, upper: float Lower and upper truncations of the beta distribution Returns ------- s: float Sampled value from the truncated beta distribution in log space """ # Check boundaries are correct if upper < lower: return # If a=1 and b=1, then we're sampling truncated uniform distribution # (i.e. peak formula below is not valid, but also not needed) if a == 1 and b == 1: s = np.random.uniform(low=lower, high=upper) return s # Get location of peak of distribution to determine type of sampling peak = (a-1) / (a+b-2) # If peak of beta dist is outside truncation, use uniform rejection sampling if peak < lower or peak > upper: # Sample a proposal s = np.random.uniform(low=lower, high=upper) # Get components of rejection sampling log_f_s = beta.logpdf(s, a, b) log_g_s = -1*np.log(upper-lower) log_M = max(beta.logpdf(lower,a,b), beta.logpdf(upper,a,b))\ + np.log(upper-lower) # Keep sampling until proposal is accepted while np.log(np.random.random()) > log_f_s - (log_M + log_g_s): s = np.random.uniform(low=lower, high=upper) log_f_s = beta.logpdf(s, a, b) # If peak of beta is inside truncation, sample from beta directly else: s = beta.rvs(a, b) # Keep sampling until proposal falls inside truncation boundaries while s < lower or s > upper: s = beta.rvs(a,b) return s
def compute_pdfs(_VS, _AS, _BS, _BB=None, check=False): M = np.array(map(lambda v: False if v is False else True, _VS)) if _BB is None: VS, AS, BS = map(np.array, [_VS, _AS, _BS]) else: VS, AS, BS, BB = map(np.array, [_VS, _AS, _BS, _BB]) VS[VS < SEQERROR] = SEQERROR VS[VS > (1.0 - SEQERROR)] = 1.0 - SEQERROR RS = np.full(M.shape[0], np.NINF) if _BB is None: RS[M] = beta.logpdf(VS[M], AS[M], BS[M]) else: NS = AS[M] + BS[M] KS = AS[M] CS = gammaln(NS + 1) - gammaln(NS - KS + 1) - gammaln( KS + 1) #comb(NS, KS) NSminusKS = NS - KS BAFS = VS[M] minusBAFS = 1 - BAFS SS = BB[M] RS[M] = CS + betaln(KS + SS * BAFS, NSminusKS + SS * minusBAFS) - betaln(SS * BAFS, SS * minusBAFS) RS[M & (RS < EPSILON)] = EPSILON if check: assert np.NINF not in RS[M] return RS
def fit_beta(cls, X): N = X.shape[0] D = X.shape[1] Xsafe = np.clip(X, 0.01, 1-0.01) P = 20 #params = np.asarray([(2+a, 52-a) for a in np.linspace(0, 50, P)]) #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, b, P)]) # The buggy one params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, c, P)]) #dists = np.asarray([beta.pdf(x, a, b) for a, b in params]) theta = np.zeros((D, 2)) scores = np.zeros(len(params)) for d in range(D): # Check likelihood of the dists for p in range(len(params)): scores[p] = beta.logpdf(Xsafe[:,d], *params[p]).sum() ii = scores.argmax() theta[d] = params[ii] return theta
def fit_beta_atleast_std(cls, X, std): variance = std**2 N = X.shape[0] D = X.shape[1] Xsafe = np.clip(X, 0.01, 1-0.01) P = 20 #params = np.asarray([(2+a, 52-a) for a in np.linspace(0, 50, P)]) #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, b, P)]) # The buggy one params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, c, P)]) a, b = params.T variances = a * b / ((a + b)**2 * (a + b + 1)) II = np.where(variances >= variance)[0] params = params[II] #dists = np.asarray([beta.pdf(x, a, b) for a, b in params]) theta = np.zeros((D, 2)) scores = np.zeros(len(params)) for d in range(D): # Check likelihood of the dists for p in range(len(params)): scores[p] = beta.logpdf(Xsafe[:,d], *params[p]).sum() ii = scores.argmax() theta[d] = params[ii] return theta
def fit_beta(cls, X): N = X.shape[0] D = X.shape[1] Xsafe = np.clip(X, 0.01, 1 - 0.01) P = 20 #params = np.asarray([(2+a, 52-a) for a in np.linspace(0, 50, P)]) #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, b, P)]) # The buggy one params = np.asarray([(b + a, b + c - a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, c, P)]) #dists = np.asarray([beta.pdf(x, a, b) for a, b in params]) theta = np.zeros((D, 2)) scores = np.zeros(len(params)) for d in range(D): # Check likelihood of the dists for p in range(len(params)): scores[p] = beta.logpdf(Xsafe[:, d], *params[p]).sum() ii = scores.argmax() theta[d] = params[ii] return theta
def __init__(self, ez_nk, B1=None, B2=None, P_lim=[2, 65536]): # These are the fixed, assumed beta distributions we use for # short-period and long-period, respectively if B1 is None: B1 = beta(1.5, 50.) if B2 is None: B2 = beta(1, 1.8) self.ez = ez_nk # (2, N, K) self.K = np.isfinite(self.ez[0]).sum(axis=-1) # (N, ) self.P_lim = P_lim # Used priors from The Joker: ln_e_p0 = beta.logpdf(self.ez[0], a=0.867, b=3.03) ln_z_p0 = np.full_like(self.ez[1], -np.log(np.log(P_lim[1]) - np.log(P_lim[0]))) self.ln_p0 = np.stack((ln_e_p0, ln_z_p0)) # (2, N, K) self.B1 = B1 self.B2 = B2 self._lnp1e = B1.logpdf(self.ez[0]) self._lnp2e = B2.logpdf(self.ez[0]) self._zlim = np.log(P_lim)
def pdf(self, u: Array, log=False): assert self.smoothing == "beta", "Empirical Copula only has density (PDF) for smoothing = 'beta'" assert isinstance( self.data, np.ndarray), "data is still undefined for EmpiricalCopula" u = self.pobs(u, self._ties) data_rank = rank_data(self.data, 1, self._ties) n = len(self.data) if log: return np.array([ log_sum( np.array([ sum(beta.logpdf(row, a=row_rank, b=n + 1 - row_rank)) for row_rank in data_rank ])) for row in u ]) - np.log(n + self._offset) else: return np.array([ sum([ np.prod(beta.pdf(row, a=row_rank, b=n + 1 - row_rank)) for row_rank in data_rank ]) for row in u ]) / (n + self._offset)
def prior_p(self, p0, p1): ''' Joint log-prior for pi0 and pi1. Uses two independent betas ''' return beta.logpdf(x=p0, a=self.ap0, b=self.bp0) + sp.stats.beta.logpdf( x=p1, a=self.ap1, b=self.bp1)
def get_particle_from_state(self, state, obs): """ Returns a particle from this state, as well as the log_density of this particle """ sample_means = beta.rvs(state['successes'], state['failures']) log_density = np.sum(beta.logpdf(sample_means, state['successes'], state['failures'])) return sample_means, log_density
def _py_log_prob(self, xs, zs): n_samples = zs.shape[0] lp = np.zeros(n_samples, dtype=np.float32) for s in range(n_samples): lp[s] = beta.logpdf(zs[s, :], a=1.0, b=1.0) for n in range(len(xs)): lp[s] += bernoulli.logpmf(xs[n], p=zs[s, :]) return lp
def dBE(y: np.ndarray, location: np.ndarray, scale: np.ndarray): """Density function. """ a = location * (1 - scale**2) / (scale**2) b = a * (1 - location) / location fy = beta.logpdf(x=y, a=a, b=b) return fy
def test_beta_log_pdf(self): from scipy.stats import beta a = 3.0 b = 2.0 for x in np.linspace(0.001,0.999,25): expected = beta.logpdf(x,a,b) got = beta_log_pdf(x,a,b) # print x,got,expected self.assertAlmostEqual(got,expected,places=6)
def f0(x): mu, a, th = x[0], x[1], x[2] res = uv_exp_ll(t, mu, a, th, T) res += gamma.logpdf(mu, mu_hyp[0], scale=mu_hyp[1]) \ + gamma.logpdf(th, theta_hyp[0], scale=theta_hyp[1]) \ + beta.logpdf(a, alpha_hyp[0], alpha_hyp[1]) return res
def _test(model, xs, zs): val_true = beta.logpdf(zs['p'], 1.0, 1.0) val_true += np.sum([bernoulli.logpmf(x, zs['p']) for x in xs['x']]) val_ed = model.log_prob(xs, zs) assert np.allclose(val_ed.eval(), val_true) zs_tf = {key: tf.cast(value, dtype=tf.float32) for key, value in six.iteritems(zs)} val_ed = model.log_prob(xs, zs_tf) assert np.allclose(val_ed.eval(), val_true)
def _test(model, xs, zs): val_true = beta.logpdf(zs['p'], 1.0, 1.0) val_true += np.sum([bernoulli.logpmf(x, zs['p']) for x in list(six.itervalues(xs))[0]]) val_ed = model.log_prob(xs, zs) assert np.allclose(val_ed.eval(), val_true) zs_tf = {key: tf.cast(value, dtype=tf.float32) for key, value in six.iteritems(zs)} val_ed = model.log_prob(xs, zs_tf) assert np.allclose(val_ed.eval(), val_true)
def test_dummy_posterior_correct(self): A = self.arr logpost = self.bhp.log_posterior_with_params(A, 5., .2, .1, A[-1]) check = self.bhp.log_likelihood_with_params(A, 5, .2, .1, A[-1]) + \ gamma.logpdf(5, self.bhp.mu_hyp[0], scale=self.bhp.mu_hyp[1]) + \ gamma.logpdf(.1, self.bhp.theta_hyp[0], scale=self.bhp.theta_hyp[1]) + \ beta.logpdf(.2, self.bhp.alpha_hyp[0], self.bhp.alpha_hyp[1]) self.assertAlmostEqual(logpost, check)
def hyper_param_inf(self, corpus, params, score): if self.verbose >= 1: print "\n****** HP INFERENCE *******" for i in range(params.n_hypermoves): if self.verbose > 1: print "\n--- current params ---" params.show() print "hyper param score:" + str(score) print " a_nr: " + str(gamma.logpdf(params.alpha_r, params.alpha_r_hp)) print " a_r: " + str(gamma.logpdf(params.alpha_nr, params.alpha_nr_hp)) print " empty_i: " + str(beta.logpdf(params.empty_intent, params.intent_hp_a, params.intent_hp_b)) new_params = Params() new_params.propose_hyper_params(params) new_score = self.score_full_lex(corpus, new_params) # print "* scoring" # params.show() if self.verbose > 1: print "--- new params ---" new_params.show() print "hyper param score:" + str(new_score) print " a_nr: " + str(gamma.logpdf(new_params.alpha_r, new_params.alpha_r_hp)) print " a_r: " + str(gamma.logpdf(new_params.alpha_nr, new_params.alpha_nr_hp)) print " empty_i: " + str(beta.logpdf(new_params.empty_intent, new_params.intent_hp_a, new_params.intent_hp_b)) if new_score - score > 0: params = new_params elif random() < exp(new_score - score): params = new_params if self.verbose >= 1: print " hp change! - old = %2.2f, new = %2.2f" % (score, new_score) # now rescore with the new parameters - redundant if you didn't swap, FIXME self.score_full_lex(corpus, params) return params
def binBounds2(alpha, a, b, t, kb): # possible p values p_vals = np.linspace(0, 1, num=int(1 / 0.001) + 1) indices = np.arange(len(p_vals)) # computation of prior log_prior_0 = beta.logpdf(p_vals, a, b) # computation of posterior log_posterior_0 = beta.logpdf(p_vals, a + kb, b + t - kb) # martingale computation log_martingale_0 = log_prior_0 - log_posterior_0 # Confidence intervals ci_condition_0 = log_martingale_0 < np.log(1 / alpha) ci_indices_0 = np.copy(indices[ci_condition_0]) return [p_vals[np.min(ci_indices_0)], p_vals[np.max(ci_indices_0)]]
def _py_log_prob(self, zs): # This example is written for pedagogy. We recommend # vectorizing operations in practice. n_minibatch = zs.shape[0] lp = np.zeros(n_minibatch, dtype=np.float32) for b in range(n_minibatch): lp[b] = beta.logpdf(zs[b, :], a=1.0, b=1.0) for n in range(len(self.data)): lp[b] += bernoulli.logpmf(self.data[n], p=zs[b, :]) return lp
def _py_log_prob(self, xs, zs): # This example is written for pedagogy. We recommend # vectorizing operations in practice. n_minibatch = zs.shape[0] lp = np.zeros(n_minibatch, dtype=np.float32) for b in range(n_minibatch): lp[b] = beta.logpdf(zs[b, :], a=1.0, b=1.0) for n in range(xs['x'].shape[0]): lp[b] += bernoulli.logpmf(xs['x'][n], p=zs[b, :]) return lp
def _py_log_prob(self, xs, zs): # This example is written for pedagogy. We recommend # vectorizing operations in practice. n_samples = zs.shape[0] lp = np.zeros(n_samples, dtype=np.float32) for b in range(n_samples): lp[b] = beta.logpdf(zs[b, :], a=1.0, b=1.0) for n in range(xs['x'].shape[0]): lp[b] += bernoulli.logpmf(xs['x'][n], p=zs[b, :]) return lp
def weight(self, sym, start, end, theta): state = (sym, start, end) try: u = self.slice_variables[state] except: raise ValueError('I do not expect to reweight a rule for an unseen state: %s' % str(state)) if theta > u: return - beta.logpdf(math.exp(u), self.a, self.b) else: raise ValueError('I do not expect to reweight rules scoring less than the threshold')
def player_beta(params, games, date, day_span, decay): """ Likelihood function to determine player beta distribution parameters :return: Likelihood """ likelihood = beta.logpdf(games['pts'] / games['team_pts'], params[0], params[1]) weight = np.exp(-decay * np.ceil( ((date - games['date']).dt.days) / day_span)) return -np.dot(likelihood, weight)
def test_diffuse_prior_posterior_correct(self): A = self.arr bhp2 = BayesianUVExpHawkesProcess((1, 10000), (1, 1), (1, 1e5)) logpost = bhp2.log_posterior_with_params(A, 5., .2, .1, A[-1]) check = bhp2.log_likelihood_with_params(A, 5, .2, .1, A[-1]) + \ gamma.logpdf(5, bhp2.mu_hyp[0], scale=bhp2.mu_hyp[1]) + \ gamma.logpdf(.1, bhp2.theta_hyp[0], scale=bhp2.theta_hyp[1]) + \ beta.logpdf(.2, bhp2.alpha_hyp[0], bhp2.alpha_hyp[1]) self.assertAlmostEqual(logpost, check)
def admixture_proportion_proposal(graph): ''' return proposed admixture fraction and "log q(x'|x) - log q(x|x')" ''' # beta proposal admixture_proportions = graph.get_admixture_proportions() proportions = dict() qs = 0 for i, e in enumerate(admixture_proportions): mu = admixture_proportions[e] admixture_edge_name = f'{e[0]}_{e[1]}_proportion' a = mu * 30 b = (1 - mu) * 30 p = np.random.beta(a, b, 1)[0] proportions[admixture_edge_name] = p q_forward = beta.logpdf(p, a, b) a = p * 30 b = (1 - p) * 30 q_backward = beta.logpdf(mu, a, b) qs += q_backward - q_forward return proportions, qs
def _test(model, xs, zs): n_samples = zs.shape[0] val_true = np.zeros(n_samples, dtype=np.float32) for s in range(n_samples): p = np.squeeze(zs[s, :]) val_true[s] = beta.logpdf(p, 1, 1) val_true[s] += np.sum([bernoulli.logpmf(x, p) for x in xs['x']]) val_ed = model.log_prob(xs, zs) assert np.allclose(val_ed.eval(), val_true) zs_tf = tf.cast(zs, dtype=tf.float32) val_ed = model.log_prob(xs, zs_tf) assert np.allclose(val_ed.eval(), val_true)
def get_beta_prior(): ''' (2) Beta(1+eps, 1+eps) we need to add this prior to the mll probs in the case where we have a flat mll curve('i.e. high regularization') ''' #prior (2) eps = 0.0001 a, b = 1.0 + eps, 1.0 + eps gridpoints = np.linspace(0.001, 0.999, 999) log_prior = beta.logpdf(gridpoints, a, b) assert len(log_prior) == len(infer.trange) return log_prior
def test_logprob(self): # Beta(1,b) = Kumaraswamy(1,b) b = torch.exp(Variable(torch.randn(10))) a = Variable(torch.ones(10)) value = Variable(torch.randn(10)) dist = Kumaraswamy(a, b) # test log probability res1 = dist.log_prob(value).data res2 = beta.logpdf(value.data.numpy(), a.data.numpy(), b.data.numpy()) res2[np.isinf(res2)] = dist.LOG_0 self.assertEqual(res1, res2) # Beta(a,1) = Kumaraswamy(a,1) a = torch.exp(Variable(torch.randn(100))) b = Variable(torch.ones(100)) value = Variable(torch.randn(100)) dist = Kumaraswamy(a, b) # test log probability res1 = dist.log_prob(value).data res2 = beta.logpdf(value.data.numpy(), a.data.numpy(), b.data.numpy()) res2[np.isinf(res2)] = dist.LOG_0 self.assertEqual(res1, res2)
def weight(self, sym, start, end, theta): state = (sym, start, end) try: u = self.slice_variables[state] except: raise ValueError( 'I do not expect to reweight a rule for an unseen state: %s' % str(state)) if theta > u: return -beta.logpdf(math.exp(u), self.a, self.b) else: raise ValueError( 'I do not expect to reweight rules scoring less than the threshold' )
def _compute_loglikelihoods(self, X): llh = 0.0 M = self._n_clusters D = X.shape[1] #print self.theta_.min(), self.theta_.max() for m in range(M): for d in range(D): #print self.theta_[m,d,0], self.theta_[m,d,1] #print X[:,d].min(), X[:,d].max(), self.theta_[m,d] llh += np.sum((self.labels_ == m) * \ beta.logpdf(X[:,d], self.theta_[m,d,0], self.theta_[m,d,1])) #print X.min(), X.max() #print self.theta_.min(), self.theta_.max() #print 'LLH:::::::', llh return llh
def _classify(neg_feats, pos_feats, mixture_params): from scipy.stats import beta M = len(mixture_params) collapsed_feats = np.apply_over_axes(np.mean, neg_feats, [0, 1]).ravel() collapsed_feats = np.clip(collapsed_feats, 0.01, 1-0.01) D = collapsed_feats.shape[0] qlogs = np.zeros(M) for m in xrange(M): #v = qlogs[m] v = 0.0 for d in xrange(D): v += beta.logpdf(collapsed_feats[d], mixture_params[m,d,0], mixture_params[m,d,1]) qlogs[m] = v #bkg_id = qlogs.argmax() #return bkg_id return qlogs
def lnpriorfn(self, p): lnfs, lnrs, lnms, q1, q2 = p[:5] lp = 0.0 if not ((0 < q1 < 1) and (0 < q2 < 1)): return -np.inf lp -= 0.5 * (((lnrs - ln_rstar) / ln_rstar_err) ** 2 + ((lnms - ln_mstar) / ln_mstar_err) ** 2) lnr, lnp, t0, b, sesn, secs = p[5:] if not 0 <= b < 2.0: return -np.inf if np.exp(lnp) < min_period: return -np.inf e = sesn**2 + secs**2 if not 0 <= e < 1.0: return -np.inf lp += beta.logpdf(e, 1.12, 3.09) return lp # + lnp
def _logp(self, value, a, b): if value < 0 or value > 1: raise ValueError("Domain Error.") return np.sum(beta.logpdf(value, a, b))
def logL(ab): a0,b0,a1,b1 = ab LL = beta.logpdf(p[Y==0],a0,b0).sum() + beta.logpdf(p[Y==1],a1,b1).sum() return -LL
def _py_log_prob(self, xs, zs): log_prior = beta.logpdf(zs['p'], a=1.0, b=1.0) log_lik = np.sum(bernoulli.logpmf(xs['x'], p=zs['p'])) return log_lik + log_prior
new = rs.normal(old, 0.05) # generate a sample from normal distribution if new < 0: att_symp[i] = old # reject ll[i] = -1e10 else: simp_old = exp(-old*tps[:5]) - exp(-old*tps[1:]) simp_new = exp(-new*tps[:5]) - exp(-new*tps[1:]) if sum(simp_new > 0) != len(tps) - 1: att_symp[i] = old # reject ll[i] = -1e10 else: # simulate probabilities corresponding to the data log_ratio = sum(beta.logpdf(simp_new, a, b, loc=0, scale=1)) - sum(beta.logpdf(simp_old, a, b, loc=0, scale=1)) if log(rs.uniform(0,1)) < log_ratio: att_symp[i] = new # accept ll[i] = sum(beta.logpdf(simp_new, a, b, loc=0, scale=1)) old = new acc = acc+1 else: att_symp[i] = old # reject ll[i] = sum(beta.logpdf(simp_old, a, b, loc=0, scale=1)) props[i] = simp_old i = i+1 att_symp = att_symp[1000:] # remove burn-in samples ll = ll[1000:] # log-likelihood
def lnprior(p,ID,Tarr,fmll,minWL,maxWL,minLWL,maxLWL,verbose=False): # apply non-informative prior on wavelength to make sure # line is not shifted outside working segment for ii,pp in enumerate(zip(p['DWL'][Tarr[...,0] != -1],fmll['WL'][Tarr[...,0] != -1])): if (np.abs(pp[0]) > 0.0) & (pp[1] > minWL) & (pp[1] < maxWL): wlshift = pp[1]+pp[0] if (wlshift < minLWL-0.05) or (wlshift > maxLWL+0.05): if verbose: print('Pro: {0} --> CAUGHT A WAVELENGTH SHIFT OUTSIDE SPECTRUM BOUNDS {1}-{2}, {3} LINE SHIFTED TO: {4}'.format(ID,minLWL-0.025,maxLWL+0.025,pp[1],wlshift)) return -np.inf # Prior on gamma using beta function mingamma = -1.5 maxgamma = 0.65 rangegamma = maxgamma-mingamma gammawprior = beta.logpdf(p['DGAMMAW'][Tarr[...,2] != -1],1.0,1.0,loc=mingamma,scale=rangegamma) gammarprior = beta.logpdf(p['DGAMMAR'][Tarr[...,3] != -1],1.0,1.0,loc=mingamma,scale=rangegamma) gammasprior = beta.logpdf(p['DGAMMAS'][Tarr[...,4] != -1],1.0,1.0,loc=mingamma,scale=rangegamma) # check to see if it returns any priors outside uniform prior if (any(np.isinf(gammawprior)) or any(np.isinf(gammarprior)) or any(np.isinf(gammasprior))) : if verbose: print('Pro: {0} --> CAUGHT A GAMMA SHIFT OUTSIDE THE PRIORS'.format(ID)) return -np.inf # Prior on gf using beta function mingflog = -10.0 maxgflog = 1.5 rangegflog = maxgflog-mingflog gfprior = beta.logpdf(p['DGFLOG'][Tarr[...,1] != -1],1.0,1.0,loc=mingflog,scale=rangegflog) # check to see if it returns any priors outside uniform prior if any(np.isinf(gfprior)): if verbose: print('Pro: {0} --> CAUGHT A LOG(GF) SHIFT OUTSIDE THE PRIORS'.format(ID)) return -np.inf velshift = 50.0 #km/s wsh = fmll['WL'][Tarr[...,0] != -1]*(velshift/speedoflight) wsh_max = max(wsh) minwll = -1.0*wsh_max maxwll = wsh_max rangewll = maxwll-minwll wlprior = beta.logpdf(p['DWL'][Tarr[...,0] != -1],1.0,1.0,loc=minwll,scale=rangewll) # check to see if it returns any priors outside uniform prior if any(np.isinf(wlprior)): if verbose: for ii,wlp in enumerate(wlprior): if np.isinf(wlp): print('Pro: {0} --> CAUGHT A WAVELENGTH SHIFT OUTSIDE THE PRIORS: line {2} shifted by {1} nm'.format(ID,p['DWL'][ii],fmll['WL'][ii])) return -np.inf # check to see if arrays are empty, if so add 0.0 so that the sumation works if len(gammawprior) == 0: gammawprior = [0.0] if len(gammarprior) == 0: gammarprior = [0.0] if len(gammasprior) == 0: gammasprior = [0.0] # 2-D gaussian prior on delta(log(gf)) and delta(lambda) # reparameterize such that they move on space evenly sig_dgflog = 0.5 sig_dWL = 0.05 gf_wl_prior = (-0.5*((p['DGFLOG'][Tarr[...,1] != -1]/sig_dgflog)**2.0)*((p['DWL'][Tarr[...,0] != -1]/sig_dWL)**2.0)) #- 0.5*np.log(2.0*np.pi*(sig_coup**2.0)) # RETURN WITH COUPLED PRIOR return np.sum(np.hstack([gammawprior,gammarprior,gammasprior,gfprior,wlprior,gf_wl_prior]))
def score_full_lex(self, corpus, params, init=False): # set up the intent caching for i in range(corpus.n_sents): # cache word and object probabilities uniformly # 1 x o matrix with [uniform ... empty] # and 1 x w matrix again with [uniform ... empty] n_os = len(corpus.sents[i][0]) if n_os > 0: unif_o = log((1 - params.empty_intent) / n_os) else: unif_o = [None] # protects against zero objects self.intent_obj_probs[i] = [unif_o] * n_os + [log(params.empty_intent)] if init: # update lexicon dirichlets based on random init io = self.oi[i] == self.intent_obj[i] rw = self.wi[i] == self.ref_word[i] if io.any(): # protect against nulls self.ref[corpus.sents[i][0][io],corpus.sents[i][1][rw]] += 1 # includes all words that are not the referential word self.non_ref[corpus.sents[i][1][self.wi[i] != self.ref_word[i]]] += 1 # now add the referential words for null objects if not io.any(): self.non_ref[corpus.sents[i][1][self.wi[i] == self.ref_word[i]]] += 1 # now set up the quick scoring probability caches self.intent_obj_prob[i] = self.intent_obj_probs[i][self.intent_obj[i]] # cache DM scores for lexicon for i in range(corpus.world.n_objs): self.ref_score[i] = score_dm(self.ref[i, :], params.alpha_r) # cache non-ref DM score also self.nr_score = score_dm(self.non_ref, params.alpha_nr) # score hyperparameters (via hyper-hyperparameters) empty_intent_score = beta.logpdf(params.empty_intent, params.intent_hp_a, params.intent_hp_b) alpha_score = gamma.logpdf(params.alpha_r, params.alpha_r_hp) + gamma.logpdf(params.alpha_nr, params.alpha_nr_hp) self.param_score = empty_intent_score + alpha_score score = self.update_score(corpus.n_sents) # debugging stuff if self.verbose >= 1: print "\n--- score full lex ---" print self.ref print " " + str(self.non_ref) if self.verbose > 1: print "counts: %d" % (sum(self.non_ref) + sum(self.ref)) print " intent obj: " + str(self.intent_obj) print " ref word: " + str(self.ref_word) print " intent obj prob: " + str(self.intent_obj_prob.round(1)) print "full score: r %2.1f, nr %2.1f, i %2.1f, " \ "p %2.1f, total: %2.1f" % (sum(self.ref_score), self.nr_score, sum(self.intent_obj_prob), self.param_score, score) return score
def fit(self, X, tol=0.00001, min_probability=0.01, min_q=0.01): Xsafe = np.clip(X, min_probability, 1 - min_probability) self._init(X, seed=0) M = self._n_clusters N = X.shape[0] D = X.shape[1] qlogs = np.zeros((M, N)) q = np.zeros((M, N)) v = np.zeros(N) loglikelihood = -np.inf new_loglikelihood = self._compute_loglikelihoods(Xsafe) self.iterations = 0 while self.iterations < 200 and (np.isinf(loglikelihood) or self.iterations < 2 or np.fabs((new_loglikelihood - loglikelihood)/loglikelihood) > tol): loglikelihood = new_loglikelihood ag.info("Iteration {0}: loglikelihood {1}".format(self.iterations, loglikelihood)) for m in range(M): v = qlogs[m] v[:] = 0.0 for d in range(D): #print beta.logpdf(Xsafe[:,d], self.theta_[m,d,0], self.theta_[m,d,1]) v += beta.logpdf(Xsafe[:,d], self.theta_[m,d,0], self.theta_[m,d,1]) qlogs[m] = v #print v.min(), v.max() #try: try: q[:] = np.exp(np.maximum(np.log(min_q), qlogs - logsumexp(qlogs, axis=0))) except: pass #except: # pass # Clip it, for saftey #print q.min(), q.max() q[:] = np.clip(q, min_q, 1 - min_q) # Update labels from these responsibilities self.labels_ = q.argmax(axis=0) # Update thetas with the new labels if 0: for m in range(M): for d in range(D): Xsafem = Xsafe[self.labels_ == m, d] sm, sv = weighted_avg_and_var(Xsafe[:,d], q[m]) #sm = np.mean(Xsafem) #sv = np.var(Xsafem) self.theta_[m,d,0] = sm * (sm * (1 - sm) / sv - 1) self.theta_[m,d,1] = (1 - sm) * (sm * (1 - sm) / sv - 1) if np.isnan(self.theta_[m,d,0]) or np.isnan(self.theta_[m,d,1]): import pdb; pdb.set_trace() raise Exception() else: for m in range(M): for d in range(D): #from scipy.optimize import newton_krylov, nonlin from scipy.special import psi Ca = np.average(np.log(Xsafe[:,d]), weights=q[m]) Cb = np.average(np.log(1-Xsafe[:,d]), weights=q[m]) a, b = self.theta_[m,d] #self.theta_[m,d,0] = newton_krylov(lambda x: (psi(x) - psi(x+b)) - Ca, 1.0) self.theta_[m,d,0] = binary_search(lambda x: (psi(x) - psi(x+b)) - Ca, 0.0001, 10000.0, maxiter=20) self.theta_[m,d,1] = binary_search(lambda x: (psi(x) - psi(x+a)) - Cb, 0.0001, 10000.0, maxiter=20) # Make sure the alpha and the beta don't get too extreme. If one needs adjusting, we need # adjust both, to preserve its mean #C = np.average( #self.theta_ = np.clip(self.theta_, 0.1, 100.0) # Calculate log-likelihood new_loglikelihood = self._compute_loglikelihoods(Xsafe) self.iterations += 1 ag.info("Iteration DONE: loglikelihood {}".format(new_loglikelihood)) if 1: # Now fit constrained betas using this distribution #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, c, P)]) #def fit_beta(cls, X): #import pdb; pdb.set_trace() for m in range(M): Xm = Xsafe[self.labels_ == m] self.theta_[m] = self.fit_beta_atleast_std(Xm, 0.225) #for m in xrange(M): # for d in xrange(D): if 0: for m in range(M): for d in range(D): if self.theta_[m,d].max() > 50: self.theta_[m,d] /= self.theta_[m,d].max() / 50