def binom_interval(success, total, confint=0.95): ''' from paulgb's binom_interval.py''' quantile = (1 - confint) / 2. lower = beta.ppf(quantile, success, total - success + 1) upper = beta.ppf(1 - quantile, success + 1, total - success) return (lower, upper)
def err(obs, total, use_beta=True, use_cache=True, for_paper=False): """ Return uncertainty on the ratio n / total """ assert obs <= total if total == 0.0: return (0.0, 0.0, False) key = str(obs) + '/' + str(total) if use_cache and key in cached_uncertainties.errs: return cached_uncertainties.errs[key] + (True, ) frac = float(obs) / total if use_beta or frac == 0.0 or frac == 1.0: # still need to use beta for 0 and 1 'cause the sqrt thing below gives garbage for those cases if for_paper: # total volume of confidence interval vol = 0.95 # use 95% for paper cpr = 0.5 # constant from prior (jeffreys for paper) else: vol = 2./3 # otherwise +/- 1 sigma cpr = 1. # constant prior lo = beta.ppf((1. - vol)/2, cpr + obs, cpr + total - obs) hi = beta.ppf((1. + vol)/2, cpr + obs, cpr + total - obs) if frac < lo: # if k/n very small (probably zero), take a one-sided c.i. with 2/3 (0.95) the mass lo = 0. hi = beta.ppf(vol, cpr + obs, cpr + total - obs) if frac > hi: # same deal if k/n very large (probably one) lo = beta.ppf(1. - vol, cpr + obs, cpr + total - obs) hi = 1. else: # square root shenaniganery err = (1./(total*total)) * (math.sqrt(obs)*total - obs*math.sqrt(total)) lo = frac - err hi = frac + err assert lo < frac or frac == 0.0 assert frac < hi or frac == 1.0 return (lo, hi) + (False, )
def plot_beta(name, a, b, ret=None, n=None): print(a, b) theta = linspace(0, 1, 300) pdf = beta.pdf(theta, a, b) ax = axes() ax.plot(theta, pdf / max(pdf)) if n is not None: ax.text(0.025, 0.9, 'TRADE IDEA %d' % n) if ret is not None: ax.text(0.025, 0.85, 'RETURN %s 0' % ('>' if ret else '<')) ax.set_title('P(hit rate | ideas so far)') ax.yaxis.set_ticks([]) ax.grid() ax.legend() ax.xaxis.set_label_text('Hit Rate') ax.xaxis.set_ticks(linspace(0, 1, 11)) s, e = (beta.ppf(0.025, a, b), beta.ppf(0.975, a, b)) ax.fill([s, s, e, e, s], [0, 1, 1, 0, 0], color='0.9') gcf().set_size_inches(10, 6) savefig(name, bbox_inches='tight') plt.close() return
def binomialCI(successes,attempts,alpha): """ Calculates the upper and lower confidence intervals on binomial data using the Clopper Pearson method Added by Doug Ollerenshaw on 02/12/2014 input: successes = number of successes attempts = number of attempts alpha = confidence range (.e.g., 0.05 to return the 95% confidence interval) output: lower bound, upper bound Refs: [1] Clopper, C. and Pearson, S. The use of confidence or fiducial limits illustrated in the case of the Binomial. Biometrika 26: 404-413, 1934 [2] http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval [3] http://www.danielsoper.com/statcalc3/calc.aspx?id=85 [an online calculator used to validate the output of this function] """ from scipy.stats import beta import math x = successes n = attempts # NOTE: the ppf (percent point function) is equivalent to the inverse CDF lower = beta.ppf(alpha/2,x,n-x+1) if math.isnan(lower): lower = 0 upper = beta.ppf(1-alpha/2,x+1,n-x) if math.isnan(upper): upper = 1 return lower,upper
def confident_fail_rate(total_fail, total_pass, confidence): """ WE CAN NOT BE OVERLY OPTIMISTIC (0.0) ABOUT PREVIOUS SAMPLES, NOR OVERLY PESSIMISTIC (1.0). WE MODIFY THE RATIO OF fail/total TOWARD NEUTRAL (0.5) BY ACCOUNTING FOR SMALL total AND STILL BE WITHIN confidence stats.beta.ppf(x, a, b) == 1 - stats.beta.ppf(1-x, b, a) betaincinv(a, b, y) == stats.beta.ppf(y, a, b) """ # SMALL NAMES OF EQUAL LENGTH TO DEMONSTRATE THE SYMMETRY BELOW confi = confidence error = 1 - confi # ppf() IS THE PERCENT POINT FUNCTION (INVERSE OF cdf() max1 = MIN(beta.ppf(confi, total_fail + 1, total_pass), 1) min1 = MAX(beta.ppf(error, total_fail, total_pass + 1), 0) # PICK THE probability CLOSEST TO 0.5 if min1 < 0.5 and 0.5 < max1: return 0.5 elif max1 < 0.5: return max1 elif 0.5 < min1: return min1 else: assert False
def set_tpm(self, total_num_reads): SegmentBin.set_expression( self, 1e6*beta.ppf(0.01, self.cnts.sum()+1e-6, total_num_reads+1e-6), 1e6*beta.ppf(0.50, self.cnts.sum()+1e-6, total_num_reads+1e-6), 1e6*beta.ppf(0.99, self.cnts.sum()+1e-6, total_num_reads+1e-6)) return self
def filter_jns(jns, antistrand_jns, whitelist=set()): filtered_junctions = defaultdict(int) jn_starts = defaultdict( int ) jn_stops = defaultdict( int ) for (start, stop), cnt in jns.iteritems(): jn_starts[start] = max( jn_starts[start], cnt ) jn_stops[stop] = max( jn_stops[stop], cnt ) for (start, stop), cnt in jns.iteritems(): if (start, stop) not in whitelist: val = beta.ppf(0.01, cnt+1, jn_starts[start]+1) if val < config.NOISE_JN_FILTER_FRAC: continue val = beta.ppf(0.01, cnt+1, jn_stops[stop]+1) if val < config.NOISE_JN_FILTER_FRAC: continue #val = beta.ppf(0.01, cnt+1, jn_grps[jn_grp_map[(start, stop)]]+1) #if val < config.NOISE_JN_FILTER_FRAC: continue try: if ( (cnt+1.)/(antistrand_jns[(start, stop)]+1) <= 1.): continue except KeyError: pass if stop - start + 1 > config.MAX_INTRON_SIZE: continue filtered_junctions[(start, stop)] = cnt return filtered_junctions
def binom_interval(success, total, confint=0.95): """ Compute two-sided binomial confidence interval in Python. Based on R's binom.test. """ quantile = (1 - confint) / 2. lower = beta.ppf(quantile, success, total - success + 1) upper = beta.ppf(1 - quantile, success + 1, total - success) return (lower, upper)
def bin_conj_prior(): # Constants global bin_data, bin_mean a, b = 2, 4 m, l = 0, 0 count = 0 u_list = [] domain = np.linspace(beta.ppf(0.01, a, b), beta.ppf(0.99, a, b), 100) prior = beta.pdf(domain, a, b) fig, ax = plt.subplots(1, 1) # Update for i in bin_data: # Go through data count += 1 if i == 1: m += 1 else: l += 1 # Calculate mean u = (a+m)/(a+m+l+b) u_list.append(u) # Calculate posterior posterior = beta.pdf(domain, a+m, b+l) # MSE bin_mean_list = [] # Obtuse way to find MSE, but effective for i in range(len(u_list)): bin_mean_list.append(bin_mean) mse = np.array(u_list) - np.array(bin_mean_list) # Plot Prior plt.figure(2) plt.plot(domain, prior) plt.title('Binomial - Prior') plt.xlabel('Number of Observations') plt.ylabel('PDF') # Plot Posterior plt.figure(3) plt.title('Binomial - Posterior') plt.xlabel('Number of Observations') plt.ylabel('PDF') plt.xlim(0,1) if count % 5 == 0: plt.plot(domain, posterior) # Plot error plt.figure(4) plt.plot(mse) plt.title('Binomial - MSE') plt.xlabel('Number of Observations') plt.ylabel('Error')
def clopper_pearson(x, n): right = beta.ppf(1 - alpha / 2, x + 1, n - x) if (np.isnan(right)): right = 1 left = beta.ppf(alpha / 2, x, n - x + 1) if (np.isnan(left)): left = 0 return right - left, (left, right)
def fraction_uncertainty(obs, total): """ Return uncertainty on the ratio n / total """ assert obs <= total if total == 0.0: return 0.0 lo = beta.ppf(1./6, 1 + obs, 1 + total - obs) hi = beta.ppf(1. - 1./6, 1 + obs, 1 + total - obs) if float(obs) / total < lo: # if k/n very small (probably zero), take a one-sided c.i. with 2/3 the mass lo = 0. hi = beta.ppf(2./3, 1 + obs, 1 + total - obs) if float(obs) / total > hi: # same deal if k/n very large (probably one) lo = beta.ppf(1./3, 1 + obs, 1 + total - obs) hi = 1. return (lo,hi)
def ClopperPearsonError(num_changes, num_random_bits, samples=1, alpha=.05): num_changes = np.array(num_changes) n = np.round(num_random_bits) x = n - num_changes x = x.clip(1e-12, n) x *= samples n *= samples # https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper-Pearson_interval interv = np.vstack((beta.ppf(1.-alpha/2., x+1, n-x), beta.ppf(alpha/2., x, n-x+1))) interv = n * (1. - interv) / samples # convert to error interv[0,:] = num_changes - interv[0,:] interv[1,:] = interv[1,:] - num_changes return interv
def binomial_confidence_interval(nsuccess, ntrial, conf=68.27): """ Find a binomial confidence interval. Uses a Bayesian method assuming a flat prior. See Cameron 2011: http://adsabs.harvard.edu/abs/2011PASA...28..128C. This is superior to the commonly-used Normal, Wilson and Clopper & Pearson (AKA 'exact') approaches. Parameters ---------- nsuccess: int The number of successes from the trials. If 0, then return the 1-sided upper limit. If the same as ntril, return the 1-sided lower limit. ntrial: int The number of trials. conf: float (default 68.27) Confidence level in percent (95, 90, 68.3% or similar). Returns ------- plo, phi : floats The two-sided confidence interval: probabilities such that >= observed number of successes occurs in fewer than conf% of cases (plo), and prob such that <= number of success occurs in fewer than conf% of cases (phi). """ from scipy.stats import beta nsuccess = int(nsuccess) ntrial = int(ntrial) assert 0 < conf < 100 if nsuccess == 0: alpha = 1 - conf / 100. plo = 0. phi = beta.ppf(1 - alpha, nsuccess + 1, ntrial - nsuccess) elif nsuccess == ntrial: alpha = 1 - conf / 100. plo = beta.ppf(alpha, nsuccess, ntrial - nsuccess + 1) phi = 1. else: alpha = 0.5 * (1 - conf / 100.) plo = beta.ppf(alpha, nsuccess, ntrial - nsuccess + 1) phi = beta.ppf(1 - alpha, nsuccess + 1, ntrial - nsuccess) return plo, phi
def beta_confidence_intervals(ci_X, ntrials, ci=0.95): """ Compute confidence intervals of beta distributions. Parameters ---------- ci_X : numpy.array Computed confidence interval estimate from `ntrials` experiments ntrials : int The number of trials that were run. ci : float, optional, default=0.95 Confidence interval to report (e.g. 0.95 for 95% confidence interval) Returns ------- Plow : float The lower bound of the symmetric confidence interval. Phigh : float The upper bound of the symmetric confidence interval. Examples -------- >>> ci_X = np.random.rand(10,10) >>> ntrials = 100 >>> Plow, Phigh = beta_confidence_intervals(ci_X, ntrials) """ # Compute low and high confidence interval for symmetric CI about mean. ci_low = 0.5 - ci / 2 ci_high = 0.5 + ci / 2 # Compute for every element of ci_X. from scipy.stats import beta Plow = ci_X * 0.0 Phigh = ci_X * 0.0 for i in range(ci_X.shape[0]): for j in range(ci_X.shape[1]): Plow[i, j] = beta.ppf(ci_low, a=ci_X[i, j] * ntrials, b=(1 - ci_X[i, j]) * ntrials) Phigh[i, j] = beta.ppf(ci_high, a=ci_X[i, j] * ntrials, b=(1 - ci_X[i, j]) * ntrials) return Plow, Phigh
def transform(Rec): # r1 = Rec == 0.0 # r2 = Rec == 1.0 # Rec[r1] = 0.000000001 # Rec[r2] = 0.999999999 # Rec = -np.log(1-Rec)*2.5 Rec = beta.ppf(Rec, a1, b1, loc1, sca1) return Rec
def qBE(p: float, location: np.ndarray, scale: np.ndarray): """Quantile function. """ a = location * (1 - scale**2) / (scale**2) b = a * (1 - location) / location q = beta.ppf(p, a=a, b=b) return q
def g(x): prob_xx=beta.cdf(x,alpha_param,beta_param)+z if prob_xx>1: #Numerical precision paranoia prob_xx=1 xx=beta.ppf(prob_xx,alpha_param,beta_param) prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param) #print ' x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff) return(prob_diff)
def select_bandit(self): c = 1 - 1. / ((self.N + 1)) # percent point functions(inverse of cdf) pps = [beta.ppf(c, a, b) for a, b in zip(self.alphas, self.betas)] return np.argmax(pps)
def adjust_thre(x, alpha_, beta_): ''' 调整阈值: 基准分布0.99 前置分布0.9999 ''' d = beta.ppf(x, alpha_, beta_) return x, d
def conf_int(self, conf_level=0.05): assert self.a is not None and self.b is not None, 'Params have not been set. First run .fit()' # Handles case where uses specifies something like 95% CI if conf_level > 0.5: conf_level = 1 - conf_level left = beta.ppf(conf_level / 2, self.a, self.b) right = beta.ppf(1 - conf_level / 2, self.a, self.b) # Used because the ppf will never return 1, only 0.9999... left = np.round(left, 2) right = np.round(right, 2) if np.isnan(left): left = self.default_left if np.isnan(right): right = self.default_right return left, right
def binomialCI(successes=None, attempts=None, alpha=0.05): from scipy.stats import beta import math x = successes n = attempts # NOTE: the ppf (percent point function) is equivalent to the inverse CDF lower = beta.ppf(alpha / 2, x, n - x + 1) if math.isnan(lower): lower = 0 upper = beta.ppf(1 - alpha / 2, x + 1, n - x) if math.isnan(upper): upper = 1 return lower, upper
def from_bp_to_errors(self, row): # row has three rows : error, x, bp, and set datetime as columns x = row["x"] bp = row["base_process"] a, b, loc, scale = self.s_x[x] simulated_error = beta.ppf(bp, a, b, loc=loc, scale=scale) # scale = 0 cause NAN return simulated_error
def extract_scores(self, outputs): alpha_posterior = outputs['alpha_posterior'] beta_posterior = outputs['beta_posterior'] scores = beta.ppf(0.5, alpha_posterior, beta_posterior) scores = np.repeat(scores[:, np.newaxis], self.n_batches, axis=1) return 1. - scores
def get_mean_accuracy(all_means, nbins=10): """ Bins ancestors according to mean bootstrapped posterior probability, and then returns the mean accuracy for each bin """ ## Add a columns of bin assignments # bins = np.linspace(0, all_means['posterior'].max(), nbins) bins = np.linspace(0, 1, nbins) all_means['bin'] = np.digitize(all_means['posterior'], bins) ## Add upper bound to right-most bin all_means.replace(to_replace={'bin':{nbins: nbins-1}}, inplace=True) ## Bin ancestors by mean bootstrapped probability, adding columns for ## whether they were the true generating ancestor, and the number of ## ancestors in each bin bin_count = lambda x: len(x) binned = all_means[['generator', 'bin']].pivot_table(index='bin', aggfunc=[np.mean, bin_count], fill_value=0) binned.columns = [['observed_prob', 'bin_count']] binned['n_successes'] = binned['observed_prob'].values * \ binned['bin_count'].values ## Estimate means and confidence intervals as sampling from a binomial ## distribution, with a uniform prior on success rates - Done using ## a beta distribution binned['alpha'] = binned['n_successes'] + 1 binned['beta'] = binned['bin_count'].values - binned['n_successes'].values + 1 beta_mean = lambda row: beta.mean(float(row['alpha']), float(row['beta'])) binned['posterior_mean'] = binned.apply(beta_mean, axis=1) ## Add confidence intercals beta_025CI = lambda row: beta.ppf(0.025, float(row['alpha']), float(row['beta'])) beta_975CI = lambda row: beta.ppf(0.975, float(row['alpha']), float(row['beta'])) binned['CI2.5'] = binned.apply(beta_025CI, axis=1) binned['CI97.5'] = binned.apply(beta_975CI, axis=1) ## Convert to values relative to mean, to fit plotting convention binned['CI2.5'] = binned['posterior_mean'].values - binned['CI2.5'].values binned['CI97.5'] = binned['CI97.5'].values - binned['posterior_mean'].values ## Add column with bin centre for plotting binned['bin_centre'] = all_means[['posterior', 'bin']].groupby('bin').mean() return binned
def choose_arm(self): if self.HF: T = self.t + 1 else: T = self.horizon d = 1 / (self.t * np.log(T)**self.c) theta = beta.ppf(1 - d, self.alphas, self.betas) self.t += 1 return np.argmax(theta)
def beta_lin(lower, upper, parameteralfa, parameterbeta, size): generation_list = x_inbetween(0, 1, size) x = [] for i in generation_list: x.append(beta.ppf(i, parameteralfa, parameterbeta)) x = [i * (upper - lower) + lower for i in x] return list(x)
def verify_agents(env, number_of_users, agents): stat = { 'Agent': [], '0.025': [], '0.500': [], '0.975': [], } for agent_id in agents: stat['Agent'].append(agent_id) data = deepcopy(env).generate_logs(number_of_users, agents[agent_id]) bandits = data[data['z'] == 'bandit'] successes = bandits[bandits['c'] == 1].shape[0] failures = bandits[bandits['c'] == 0].shape[0] stat['0.025'].append(beta.ppf(0.025, successes + 1, failures + 1)) stat['0.500'].append(beta.ppf(0.500, successes + 1, failures + 1)) stat['0.975'].append(beta.ppf(0.975, successes + 1, failures + 1)) return pd.DataFrame().from_dict(stat)
def get_probility(x, N, a=0.05): ''' 计算 N 此重复实验,事件 A 发生 x 次时,事件 A 的发生概率 Args: x : 事件发生次数 N : 总实验次数 a : 设置显著性水平 1-a,默认 0.05 Returns: P, E, std, (low, high) 事件 A 发生概率 P 的最概然取值、期望、以及其置信区间 ''' P = 1.0 * x / N E = (x + 1.0) / (N + 2.0) std = np.sqrt(E * (1 - E) / (N + 3)) low = beta.ppf(0.5 * a, x + 1, N - x + 1) high = beta.ppf(1 - 0.5 * a, x + 1, N - x + 1) return P, E, std, (low, high)
def display(self,names=None): """ Plot the posterior distributions of the bandit arms. INPUT: names (list) of string names of the bandit arms; defaults to None; optional """ for i in xrange(self.n): a = self.arms[i][0] + 1 b = self.arms[i][1] + 1 # generate values that will define the line of the beta distribution x = np.linspace(beta.ppf(0.01, a, b),beta.ppf(0.99, a, b),100) l = str(i) if names: plt.plot(x,beta(a,b).pdf(x),label=names[i]) # plot the pdf of the beta distribution else: plt.plot(x,beta(a,b).pdf(x),label=l) plt.legend(loc=2) plt.title(str(self.arms)) plt.show()
def rBE(n: int, location: np.ndarray, scale: np.ndarray): """Random variable generation function. """ n = math.ceil(n) p = np.random.uniform(0, 1, n) a = location * (1 - scale**2) / (scale**2) b = a * (1 - location) / location r = beta.ppf(p, a=a, b=b) return r
def beta_confidence_intervals(ci_X, ntrials, ci=0.95): """ Compute confidence intervals of beta distributions. Parameters ---------- ci_X : numpy.array Computed confidence interval estimate from `ntrials` experiments ntrials : int The number of trials that were run. ci : float, optional, default=0.95 Confidence interval to report (e.g. 0.95 for 95% confidence interval) Returns ------- Plow : float The lower bound of the symmetric confidence interval. Phigh : float The upper bound of the symmetric confidence interval. Examples -------- >>> ci_X = np.random.rand(10,10) >>> ntrials = 100 >>> [Plow, Phigh] = beta_confidence_intervals(ci_X, ntrials) """ # Compute low and high confidence interval for symmetric CI about mean. ci_low = 0.5 - ci / 2 ci_high = 0.5 + ci / 2 # Compute for every element of ci_X. from scipy.stats import beta Plow = ci_X * 0.0 Phigh = ci_X * 0.0 for i in range(ci_X.shape[0]): for j in range(ci_X.shape[1]): Plow[i, j] = beta.ppf(ci_low, a=ci_X[i, j] * ntrials, b=(1 - ci_X[i, j]) * ntrials) Phigh[i, j] = beta.ppf(ci_high, a=ci_X[i, j] * ntrials, b=(1 - ci_X[i, j]) * ntrials) return [Plow, Phigh]
def _bound(self, delta, n_scale=1.0): if self._mode == 'trivial': l = 0 u = 1 else: # Get statistics of the samples S = self._get_samples() n, ns, p = len(S) * n_scale, np.sum(S), np.mean(S) if n == 0: return (0, 1) # Compute the bound if self._mode == 'jeffery': l = beta.ppf(delta / 2, ns + 0.5, n - ns + 0.5) if (ns > 0) else 0 u = beta.ppf(1 - delta / 2, ns + 0.5, n - ns + 0.5) if (ns < n) else 1 elif self._mode == 'wilson': z = norm.ppf(1 - delta / 2) v = z**2 - (1 / n) + 4 * n * p * (1 - p) + (4 * p - 2) den = 2 * (n + z**2) i = (z * np.sqrt(v) + 1) c = (2 * n * p + z**2) l = max(0, (c - i) / den) u = min(1, (c + i) / den) elif self._mode == 'learned-miller': S = np.sort(S) D = np.diff(S.tolist() + [1.0]) U = np.random.random((5000, n)) U = np.sort(U, axis=1) M = 1 - (U * D[None]).sum(1) M = np.sort(M) i_ub = np.ceil((1 - delta) * 5000).astype(int) u = M[i_ub] elif self._mode == 'bootstrap': n_resamples = 1000 Z = (np.random.multinomial(S.shape[0], np.ones(S.shape[0]) / S.shape[0], n_resamples) * S[None, :]).mean(1) l, u = np.percentile(Z, (100 * delta / 2, 100 * (1 - delta / 2))) else: raise Exception('Unknown mode: %s' % self._mode) return (l, u)
def learn(self, old_obs, act, rew, new_obs, done): self.t += 1 if rew > 0.5: self.successes[act] += 1 else: self.failures[act] += 1 self.means[act] = self.successes[act]/(self.successes[act] + self.failures[act]) #update UCB bonus self.ucb_bonus = beta.ppf(1-1/((self.t+1)*self.log_horizon**self.c), self.successes, self.failures)
def qqplot(pvals, minuslog10p=False, text='', fontsize='medium', errorbars=True, maxy=None, ax=None, **kwargs): if ax is None: ax = plt.gca() x = np.arange(1 / len(pvals), 1 + 1 / len(pvals), 1 / len(pvals))[:len(pvals)] logx = -np.log10(x) if maxy is None: maxy = 3 * np.max(logx) if minuslog10p: logp = np.sort(pvals)[::-1] else: logp = -np.log10(np.sort(pvals)) logp[logp >= maxy] = maxy l, r = min(np.min(logp), np.min(logx)), max(np.max(logx), np.max(logp)) if errorbars: ranks = np.arange(1, len(logp) + 1) cilower = -np.log10(beta.ppf(.025, ranks, len(logx) - ranks + 1)) ciupper = -np.log10(beta.ppf(.975, ranks, len(logx) - ranks + 1)) ax.fill_between(logx, cilower, ciupper, facecolor='gray', interpolate=True, alpha=0.2, linewidth=0) ax.scatter(logx, logp, **kwargs) ax.plot([l, r], [l, r], c='gray', linewidth=0.2, dashes=[1, 1]) ax.set_xlim(min(logx), 1.01 * max(logx)) ax.set_xlabel(r'$-\log_{10}(\mathrm{rank}/n)$', fontsize=fontsize) ax.set_ylabel(r'$-\log_{10}(p)$', fontsize=fontsize) ax.set_title(text) plt.tight_layout()
def plot_beta_distribution(): head = 0 tail = 0 x = np.linspace(beta.ppf(0.00, 1, 1), beta.ppf(1.00, 1, 1), 200) plt.subplot(2, 4, 1) plt.plot(x, beta.pdf(x, 1, 1), 'r-', lw=5, label='beta pdf') counter = 2 for outcome in outcomes: head = head + outcome tail = tail + (1 - outcome) alpha, b = get_beta_posterior_parameters(head, tail) x = np.linspace(beta.ppf(0.00, alpha, b), beta.ppf(1.00, alpha, b), 200) plt.subplot(2, 4, counter) plt.plot(x, beta.pdf(x, alpha, b), 'r-', lw=5, label='beta pdf') counter += 1 # plt.axis('equal') plt.show()
def test_beta_template(): alpha = .05 k = 5 m = 10 t = beta_template(alpha, k, m) assert_array_almost_equal( t, beta.ppf(alpha, np.arange(1, k + 1), np.array([m + 1] * k) - np.arange(1, k + 1))) assert isinstance(t, np.ndarray) assert len(t) == k
def select_items(self, required_num, item_score): self.turn += 1 sample_val = [0.0] * self.K for k in range(self.K): z0 = beta.ppf(1.0 - 1.0 / self.turn, sum(self.S[k]), sum(self.N[k]) - sum(self.S[k])) sample_val[k] = z0 * item_score[k] items = sorted([(sample_val[k], k) for k in range(self.K)], reverse=True) result = [items[i][1] for i in range(required_num)] return result
def ucb(n: int, k: int, alpha: float): """ Computes an upper confidence bound on the probability parameter p of a binomial CDF. Returns: The smallest p such that Pr[Binom[n,p] <= k] <= alpha """ if k == n: return 1 else: return beta.ppf(1 - alpha, k + 1, n - k)
def test_beta_distribution(): fig, ax = plt.subplots(1, 1) a, b = 10, 30 # Mean(‘m’), variance(‘v’), skew(‘s’), and/or kurtosis(‘k’). mean, var, skew, kurt = beta.stats(a, b, moments='mvsk') print(mean) print(var) print(skew) print(kurt) print(beta.pdf(0.333, a, b)) x = np.linspace(beta.ppf(0.01, a, b), beta.ppf(0.99, a, b), 100) ax.plot(x, beta.pdf(x, a, b), 'r-', lw=5, alpha=0.6, label='beta pdf') rv = beta(a, b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = beta.ppf([0.001, 0.5, 0.999], a, b) np.allclose([0.001, 0.5, 0.999], beta.cdf(vals, a, b)) r = beta.rvs(a, b, size=1000) ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def get_initial_state(self, **kwargs): state = {} state['t'] = 0 state['K'] = self.K state['successes'] = np.full(self.env.nA, self.prior_a, dtype=np.int32) state['failures'] = np.full(self.env.nA, self.prior_b, dtype=np.int32) state['means'] = state['successes'] / (state['successes'] + state['failures']) state['ucl_bonus'] = beta.ppf(1 - 1 / (state['K'] * (state['t'] + 1)), state['successes'], state['failures']) return state
def BBP(prior_a, prior_b, path=None): lol = read_data() #x = np.linspace(beta.ppf(0.01, prior_a, prior_b), beta.ppf(0.99, prior_a, prior_b), 100) #rv = beta(prior_a, prior_b) #plt.plot(x, rv.pdf(x)) #plt.show() for line in lol: x = np.linspace(beta.ppf(0.01, prior_a, prior_b), beta.ppf(0.99, prior_a, prior_b), 100) rv = beta(prior_a, prior_b) plt.plot(x, rv.pdf(x)) binolike = sum(line) / len(line) print(binolike) print(prior_a, prior_b) prior_a = prior_a + sum(line) prior_b = prior_b + len(line) - sum(line) print(prior_a, prior_b) print("-----") plt.show()
def inverse_beta_ml_band(z,i,N): alpha_param=i+1 beta_param=N-i+1 small=.5 if i<small: x=0 xx=beta.ppf(z,alpha_param,beta_param) elif i>N-small: x=beta.ppf(1.0-z,alpha_param,beta_param) xx=1 if np.isnan(x): print "NaN failure in beta edge case" IPython.embed() else: x_min=0 x_max=beta.ppf(1.0-z,alpha_param,beta_param) def g(x): prob_xx=beta.cdf(x,alpha_param,beta_param)+z if prob_xx>1: #Numerical precision paranoia prob_xx=1 xx=beta.ppf(prob_xx,alpha_param,beta_param) prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param) #print ' x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff) return(prob_diff) try: #print '*** (x_min,x_max)=%f,%f'%(x_min,x_max) x=brentq(g,x_min,x_max) #print ' (x)=%f\n'%(x) except: print "Failure at 'brentq'." IPython.embed() prob_xx=beta.cdf(x,alpha_param,beta_param)+z if prob_xx>1: #Numerical precision paranoia prob_xx=1 xx=beta.ppf(prob_xx,alpha_param,beta_param) larger_x=np.max((x,xx)) smaller_x=np.min((x,xx)) return((smaller_x,larger_x))
def learn(self, old_obs, act, rew, new_obs, done): self.t += 1 if rew > 0.5: self.successes[act] += 1 else: self.failures[act] += 1 self.means[act] = self.successes[act] / (self.successes[act] + self.failures[act]) #update UCL bonus self.ucl_bonus = beta.ppf(1 - 1 / (self.K * (self.t + 1)), self.successes, self.failures)
def plot(self, data, size, newdata=None): data = np.array(data) numsample = len(data) colmean = np.mean(data, axis=0) matcov = np.cov(data.T) matinv = np.linalg.inv(matcov) values = [] for sample in data: dif = sample - colmean value = matinv.dot(dif.T).dot(dif) values.append(value) cl = ((numsample - 1)**2) / numsample lcl = cl * beta.ppf(0.00135, size / 2, (numsample - size - 1) / 2) center = cl * beta.ppf(0.5, size / 2, (numsample - size - 1) / 2) ucl = cl * beta.ppf(0.99865, size / 2, (numsample - size - 1) / 2) return (values, center, lcl, ucl, self._title)
def lcb(n: int, k: int, alpha: float): """ Computes a lower confidence bound on the probability parameter p of a binomial CDF. Returns: The largest p such that Pr[Binom[n,p] >= k] <= alpha """ if k == 0: return 0 else: # Inspired by https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper%E2%80%93Pearson_interval return beta.ppf(alpha, k, n - k + 1)
def event_time(t0, x, u, exp_num): if exp_num == 0: lambda_ = 1 if x == 0 else 2 return -math.log(u) / lambda_ + t0 if exp_num == 1: return beta.ppf(-math.log(u) / beta.pdf(x, 2, 2) + beta.cdf(t0, 2, 2), 2, 2) if exp_num == 2: return beta.ppf(-math.log(u) / beta.pdf(x, 4, 4) + beta.cdf(t0, 4, 4), 4, 4) if exp_num == 3: return np.exp(x - norm.ppf(u * norm.cdf(x - math.log(t0)))) if exp_num == 4: return np.power( -math.log(u) / (np.exp(-0.5 * math.cos(2 * math.pi * x) - 1.5)) + np.power(t0, 1.5), 2 / 3)
def random_sample_beta_inv_cdf(cls, E, a): """ Sample a beta inverse CDF with expectation E and alpha param a. The values of E and alpha will fix the value of beta. """ y_in = random.random() b = a * (1.0 / E - 1.0) # Use the percent point function - inverse CDF x_out = beta.ppf(y_in, a, b) x = y_in # This is the input random number, uniform between 0-1 y = x_out # This is the output weighted random number return (x, y)
def Dist(stvars, value, inpt): v = zeros(inpt) for j in range(inpt): if stvars[j].dist == 'NORM': v[j] = norm.ppf(norm.cdf(value[j], 0, 1), stvars[j].param[0], stvars[j].param[1]) elif stvars[j].dist == 'LNORM': v[j] = lognorm.ppf(norm.cdf(value[j], 0, 1), stvars[j].param[1], 0, exp(stvars[j].param[0])) elif stvars[j].dist == 'BETA': v[j] = beta.ppf(norm.cdf(value[j], 0, 1), stvars[j].param[0], stvars[j].param[1], stvars[j].param[2], stvars[j].param[3] - stvars[j].param[2]) elif stvars[j].dist == 'UNIF': v[j] = uniform.ppf(norm.cdf(value[j], 0, 1), stvars[j].param[0], stvars[j].param[1]) return v
def cbox_nom(k, n): a = k b = n - k + 1 c = k + 1 d = n - k cbox = pbox() steps = cbox.steps cbox_beta = beta.ppf([xy/steps for xy in [x+1 for x in range(steps)]], [rep(a,steps),rep(c,steps)], [rep(b,steps),rep(d,steps)]) cbox.left = cbox_beta[0] cbox.right = cbox_beta[1] return cbox
def generateLatinHypercubeSampledMultipliers(self, specification_map, number_samples) : # Construct sets of random sampled multipliers from the selected distribution for each parameter multiplier_sets = {} for key, specification in specification_map.items() : # Generate stratified random probability values for distribution generation via inverse CDF stratified_random_probabilities = ((np.array(range(number_samples)) + np.random.random(number_samples))/number_samples) # Use stratified random probability values to generate stratified samples from selected distribution via inverse CDF distribution = specification['distribution'] if distribution == 'uniform' : lower = specification['settings']['lower'] base = specification['settings']['upper'] - lower multiplier_sets[key] = uniform.ppf(stratified_random_probabilities, loc=lower, scale=base).tolist() elif distribution == 'normal' : mean = specification['settings']['mean'] std_dev = specification['settings']['std_dev'] multiplier_sets[key] = norm.ppf(stratified_random_probabilities, loc=mean, scale=std_dev).tolist() elif distribution == 'triangular' : a = specification['settings']['a'] base = specification['settings']['b'] - a c_std = (specification['settings']['c'] - a)/base multiplier_sets[key] = triang.ppf(stratified_random_probabilities, c_std, loc=a, scale=base).tolist() elif distribution == 'lognormal' : lower = specification['settings']['lower'] scale = specification['settings']['scale'] sigma = specification['settings']['sigma'] multiplier_sets[key] = lognorm.ppf(stratified_random_probabilities, sigma, loc=lower, scale=scale).tolist() elif distribution == 'beta' : lower = specification['settings']['lower'] base = specification['settings']['upper'] - lower a = specification['settings']['alpha'] b = specification['settings']['beta'] multiplier_sets[key] = beta.ppf(stratified_random_probabilities, a, b, loc=lower, scale=base).tolist() # Randomly select from sampled multiplier sets without replacement to form multipliers (dictionaries) sampled_multipliers = [] for i in range(number_samples) : sampled_multiplier = {} for key, multiplier_set in multiplier_sets.items() : random_index = np.random.randint(len(multiplier_set)) sampled_multiplier[key] = multiplier_set.pop(random_index) sampled_multipliers.append(sampled_multiplier) return sampled_multipliers
def is_heterozygous((n_a, n_b, gamma)): """ Determines if an allele should be considered heterozygous. Arguments: n_a (int): number of a alleles counted. Used as alpha parameter for the beta distribution n_b (int): number of b alleles counted. Used as beta parameter for the beta distribution gamma (float): parameter used for deciding heterozygosity; determined via a beta distribution with 1 - gamma confidence Returns: A boolean indicating whether or not the allele should be considered heterozygous. """ if n_a == -1 or n_b == -1: return False p_lower = gamma / 2.0 p_upper = 1 - p_lower [c_lower, c_upper] = beta.ppf([p_lower, p_upper], n_a + 1, n_b + 1) return c_lower <= 0.5 and c_upper >= 0.5
Lthr = -0.21 # how many events of L < -21% occured? ne = np.sum(ret < Lthr) # of what magnitude? ev = ret[ret < Lthr] # avgloss = np.mean(ev) # if ev is non-empty array # prior alpha0 = beta0 = 1 # posterior alpha1 = alpha0 + ne beta1 = beta0 + N - ne pr = alpha1 / (alpha1 + beta1) cl1 = beta.ppf(0.05, alpha1, beta1) cl2 = beta.ppf(0.95, alpha1, beta1) ne252 = np.round(252 / (1 / cl2)) print("ne = %g" % ne) print("alpha', beta' = %g, %g" % (alpha1, beta1)) print("Pr(L < %3g%%) = %5.2f%%\t[%5.2f%%, %5.2f%%]" % (Lthr * 100, pr * 100, cl1 * 100, cl2 * 100)) print("E(ne) = %g" % ne252) ############################ returns = cp[1:] / cp[:-1] - 1 Lthr = -0.11 Pr = CL1 = CL2 = np.array([]) for i in range(2, len(returns)):
from app import app from dash.dependencies import Input, Output, State from scipy.stats import norm,bernoulli,beta,binom # global theta value global theta theta = 0 n = 10 # for lambert example # posterior dist a = 3 b = 9 x_beta = np.linspace(beta.ppf(0.01,a,b),beta.ppf(0.99,a,b),100) y_beta = beta.pdf(x_beta,a,b) x1 = np.random.randn(200) hist_data = [x1] trace = go.Scatter( x = x_beta, y = y_beta ) layout_posterior = go.Layout( xaxis={'title': 'X'},
def binom_interval(success, total, confint=0.95): quantile = (1 - confint) / 2. mode = float(success)/total lower = beta.ppf(quantile, success, total - success + 1) upper = beta.ppf(1 - quantile, success + 1, total - success) return (mode, lower, upper)
def CDF_error_beta(n, target_quantile, quantile_quantile): k = target_quantile * n return (beta.ppf(quantile_quantile, k, n + 1 - k))
def SA_FAST(driver): # First order indicies for a given model computed with Fourier Amplitude Sensitivity Test (FAST). # R. I. Cukier, C. M. Fortuin, Kurt E. Shuler, A. G. Petschek and J. H. Schaibly. # Study of the sensitivity of coupled reaction systems to uncertainties in rate coefficients. # I-III Theory/Applications/Analysis The Journal of Chemical Physics # # Input: # inpt : no. of input factors # # Output: # SI[] : sensitivity indices # Other used variables/constants: # OM[] : frequencies of parameters # S[] : search curve # X[] : coordinates of sample points # Y[] : output of model # OMAX : maximum frequency # N : number of sample points # AC[],BC[]: fourier coefficients # V : total variance # VI : partial variances # ---------------------- Setup --------------------------- methd = 'FAST' method = '9' mu = [inp.get_I_mu() for inp in driver.inputs] I_sigma = [inp.get_I_sigma() for inp in driver.inputs] inpt = len(driver.inputs) input = driver.inputNames krig = driver.krig limstate= driver.limstate lrflag = driver.lrflag n_meta = driver.n_meta nEFAST = driver.nEFAST nSOBOL = driver.nSOBOL nMCS = driver.nMCS nodes = driver.nodes order = driver.order otpt = len(driver.outputNames) output = driver.outputNames p = driver.p plotf = 0 r = driver.r simple = driver.simple stvars = driver.stvars # ---------------------- Model --------------------------- # MI = 4#: maximum number of fourier coefficients that may be retained in # calculating the partial variances without interferences between the assigned frequencies # # Frequency assignment to input factors. OM = SETFREQ(inpt) # Computation of the maximum frequency # OMAX and the no. of sample points N. OMAX = int(OM[inpt-1]) N = 2 * MI * OMAX + 1 # Setting the relation between the scalar variable S and the coordinates # {X(1),X(2),...X(inpt)} of each sample point. S = pi / 2.0 * (2 * arange(1,N+1) - N-1) / N ANGLE = matrix(OM).T * matrix(S) X = 0.5 + arcsin(sin(ANGLE.T)) / pi # Transform distributions from standard uniform to general. for j in range(inpt): if stvars[j].dist == 'NORM': X[:,j] = norm.ppf(uniform.cdf(X[:,j], 0, 1), stvars[j].param[0], stvars[j].param[1]) elif stvars[j].dist == 'LNORM': X[:,j] = lognorm.ppf(uniform.cdf(X[:, j], 0, 1), stvars[j].param[1], 0, exp(stvars[j].param[0])) elif stvars[j].dist == 'BETA': X[:,j] = beta.ppf(uniform.cdf(X[:, j], 0, 1), stvars[j].param[0], stvars[j].param[1], stvars[j].param[2], stvars[j].param[3] - stvars[j].param[2]) elif stvars[j].dist == 'UNIF': X[:,j] = uniform.ppf(uniform.cdf(X[:,j], 0, 1), stvars[j].param[0], stvars[j].param[1]) # Do the N model evaluations. Y = zeros((N, otpt)) if krig == 1: load("dmodel") Y = predictor(X, dmodel) else: values = [] for p in range(N): # print 'Running simulation on test',p+1,'of',N # Y[p] = run_model(driver, array(X[p])[0]) values.append(array(X[p])[0]) Y = run_list(driver, values) # Computation of Fourier coefficients. AC = zeros((N, otpt))# initially zero BC = zeros((N, otpt))# initially zero # q = int(N / 2)-1 q = (N-1)/2 for j in range(2,N+1,2): # j is even # print "Y[q]",Y[q] # print "matrix(cos(pi * j * arange(1,q+) / N))",matrix(cos(pi * j * arange(1,q+1) / N)) # print "matrix(Y[q + arange(0,q)] + Y[q - arange(0,q)])",matrix(Y[q + arange(1,q+1)] + Y[q - arange(1,q+1)]) AC[j-1] = 1.0 / N * matrix(Y[q] + matrix(cos(pi * j * arange(1,q+1) / N)) * matrix(Y[q + arange(1,q+1)] + Y[q - arange(1,q+1)])) for j in range(1,N+1,2): # j is odd BC[j-1] = 1.0 / N * matrix(sin(pi * j * arange(1,q+1) / N)) * matrix(Y[q + arange(1,q+1)] - Y[q - arange(1,q+1)]) # Computation of the general variance V in the frequency domain. V = 2 * (matrix(AC).T * matrix(AC) + matrix(BC).T * matrix(BC)) # Computation of the partial variances and sensitivity indices. # Si=zeros(inpt,otpt); Si = zeros((otpt,otpt,inpt)); for i in range(inpt): Vi = zeros((otpt, otpt)) for j in range(1,MI+1): idx = j * OM[i]-1 Vi = Vi + AC[idx].T * AC[idx] + BC[idx].T * BC[idx] Vi = 2. * Vi Si[:, :, i] = Vi / V if lrflag == 1: SRC, stat = SRC_regress.SRC_regress(X, Y, otpt, N) # ---------------------- Analyze --------------------------- Sti = []# appears right after the call to this method in the original PCC_Computation.m # if plotf == 1: # piecharts(inpt, otpt, Si, Sti, method, output) if simple == 1: Si_t = zeros((inpt,otpt)) for p in range(inpt): Si_t[p] = diag(Si[:, :, p]) Si = Si_t.T Results = {'FirstOrderSensitivity': Si} if lrflag == 1: Results.update({'SRC': SRC, 'R^2': stat}) return Results
def gridSetting(data,options,Seed): # Initialisierung d = np.size(options['borders'],0) X1D = [] '''Equal steps in cumulative distribution''' if options['gridSetType'] == 'cumDist': Like1D = np.zeros([options['GridSetEval'], 1]) for idx in range(d): if options['borders'][idx, 0] < options['borders'][idx,1]: X1D.append(np.zeros([1, options['stepN'][idx]])) local_N_eval = options['GridSetEval'] while any(np.diff(X1D[idx]) == 0): Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval) alpha = Seed[0] beta = Seed[1] l = Seed[2] gamma = Seed[3] varscale = Seed[4] if idx == 1: alpha = Xtest1D elif idx == 2: beta = Xtest1D elif idx == 3: l = Xtest1D elif idx == 4: gamma = Xtest1D elif idx == 5: varscale = Xtest1D Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale]) Like1D = Like1D + np.mean(Like1D)*options['UniformWeight'] Like1D = np.cumsum(Like1D) Like1D = Like1D/max(Like1D) wanted = np.linspace(0,1,options['stepN'][idx]) for igrid in range(options['stepN'][idx]): X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #TODO check local_N_eval = 10*local_N_eval else: X1D.append(copy.deepcopy(options['borders'][idx,0])) ''' equal steps in cumulative second derivative''' elif (options['gridSetType'] in ['2', '2ndDerivative']): Like1D = np.zeros([options['GridSetEval'], 1]) for idx in range(d): if options['borders'][idx,0] < options['borders'][idx,1]: X1D.append(np.zeros([1,options['stepN'][idx]])) local_N_eval = options['GridSetEval'] while any(np.diff(X1D[idx] == 0)): Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval) alpha = Seed[0] beta = Seed[1] l = Seed[2] gamma = Seed[3] varscale = Seed[4] if idx == 1: alpha = Xtest1D elif idx == 2: beta = Xtest1D elif idx == 3: l = Xtest1D elif idx == 4: gamma = Xtest1D elif idx == 5: varscale = Xtest1D # calc likelihood on the line Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale]) Like1D = np.abs(np.convolve(np.squeeze(Like1D), np.array([1,-2,1]), mode='same')) Like1D = Like1D + np.mean(Like1D)*options['UniformWeight'] Like1D = np.cumsum(Like1D) Like1D = Like1D/max(Like1D) wanted = np.linspace(0,1,options['stepN'][idx]) for igrid in range(options['stepN'][idx]): X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #ToDo local_N_eval = 10*local_N_eval if local_N_eval > 10**7: X1D[idx] = np.unique(np.array(X1D)) # TODO check break else: X1D.append(options['borders'][idx,0]) ''' different choices for the varscale ''' ''' We use STD now directly as parametrisation''' elif options['gridSetType'] in ['priorlike', 'STD', 'exp', '4power']: for i in range(4): if options['borders'](i,0) < options['borders'](i,1): X1D.append(np.linspace(options['borders'][i,0], options['borders'][i,1], options['stepN'][i])) else: X1D.append(copy.deepcopy(options['borders'][id,0])) if options['gridSetType'] == 'priorlike': maximum = b.cdf(options['borders'][4,1],1,options['betaPrior']) minimum = b.cdf(options['borders'][4,0],1,options['betaPrior']) X1D.append(b.ppf(np.linspace(minimum, maximum, options['stepN'][4]), 1, options['betaPrior'])) elif options['gridSetType'] == 'STD': maximum = np.sqrt(options['borders'][4,1]) minimum = np.sqrt(options['borders'][4,0]) X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**2) elif options['gridSetType'] == 'exp': p = np.linspace(1,1,options['stepN'][4]) X1D.append(np.log(p)/np.log(.1)*(options['borders'][4,1] - options['borders'][4,0]) + options['borders'][4,0]) elif options['gridSetType'] == '4power': maximum = np.sqrt(options['borders'][4,1]) minimum = np.sqrt(options['borders'][4,0]) X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**4) return X1D
def binom_interval(success, total, conf=0.95): quantile = (1 - conf) / 2. lower = beta.ppf(quantile, success, total - success + 1) upper = beta.ppf(1 - quantile, success + 1, total - success) return (lower, upper)