def plot_like(self): plt.figure() xv = np.arange(0,1,0.01) yin = beta.pdf(xv,self.alp_in,self.bet_in) yout = beta.pdf(xv,self.alp_out,self.bet_out) plt.plot(xv,yin) plt.plot(xv,yout)
def g(x): prob_xx=beta.cdf(x,alpha_param,beta_param)+z if prob_xx>1: #Numerical precision paranoia prob_xx=1 xx=beta.ppf(prob_xx,alpha_param,beta_param) prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param) #print ' x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff) return(prob_diff)
def beta_proposal(current, var): alpha_beta_fwd = jmutils.beta_shape(current, var) proposed = beta.rvs(*alpha_beta_fwd) fwd_prob = beta.pdf(proposed, *alpha_beta_fwd) alpha_beta_back = jmutils.beta_shape(proposed, var) back_prob = beta.pdf(current, *alpha_beta_back) log_back_fwd = math.log(back_prob / fwd_prob) return proposed, log_back_fwd
def bin_conj_prior(): # Constants global bin_data, bin_mean a, b = 2, 4 m, l = 0, 0 count = 0 u_list = [] domain = np.linspace(beta.ppf(0.01, a, b), beta.ppf(0.99, a, b), 100) prior = beta.pdf(domain, a, b) fig, ax = plt.subplots(1, 1) # Update for i in bin_data: # Go through data count += 1 if i == 1: m += 1 else: l += 1 # Calculate mean u = (a+m)/(a+m+l+b) u_list.append(u) # Calculate posterior posterior = beta.pdf(domain, a+m, b+l) # MSE bin_mean_list = [] # Obtuse way to find MSE, but effective for i in range(len(u_list)): bin_mean_list.append(bin_mean) mse = np.array(u_list) - np.array(bin_mean_list) # Plot Prior plt.figure(2) plt.plot(domain, prior) plt.title('Binomial - Prior') plt.xlabel('Number of Observations') plt.ylabel('PDF') # Plot Posterior plt.figure(3) plt.title('Binomial - Posterior') plt.xlabel('Number of Observations') plt.ylabel('PDF') plt.xlim(0,1) if count % 5 == 0: plt.plot(domain, posterior) # Plot error plt.figure(4) plt.plot(mse) plt.title('Binomial - MSE') plt.xlabel('Number of Observations') plt.ylabel('Error')
def priors_plot(): x = sp.linspace(0,1,1000) y2 = beta.pdf(x,1,2) y3 = beta.pdf(x,10,20) y4 = beta.pdf(x,100,200) plt.plot(x,y2, label = r'$\alpha = 1, \beta = 2$') plt.plot(x,y3, label = r'$\alpha = 10, \beta = 20$') plt.plot(x,y4, label = r'$\alpha = 100, \beta = 200$') plt.legend() plt.savefig('priors.pdf')
def prob_noisy_vec(given, bayes, noise_type, noise): if noise_type == "truncnorm": return tn_pdf_01(given, bayes, noise) elif noise_type == "beta": if hasattr(noise, "__len__"): #return np.array([beta.pdf(g, *jmutils.beta_shape(bayes, n)) for g, n in zip(given, noise)]) return np.array([beta.pdf(g, *mode_beta(bayes, n)) for g, n in zip(given, noise)]) else: return beta.pdf(given, *jmutils.beta_shape(bayes, noise)) else: print("Unknown noise type") sys.exit("prob_noisy_vec: Unknown noise type")
def beta_proposal_old(current, var): proposed = 0 tries = 0 while (proposed == 0) or (proposed == 1): proposed = beta.rvs(c * current, c * (1 - current)) tries += 1 if (tries > 1000): 1/0 print("Sampler is jammed") fwd_prob = beta.pdf(proposed, c * current, c * (1 - current)) back_prob = beta.pdf(current, c * proposed, c * (1 - proposed)) log_back_fwd = math.log(back_prob / fwd_prob) return proposed, log_back_fwd
def betaconv(res,alpha1, beta1, alpha2, beta2): #Set support import numpy as np from scipy.stats import beta x=np.arange(0,2.001,res)#0:res:2; #Individual Beta pdfs f1 = beta.pdf(x,alpha1,beta1) f2 = beta.pdf(x,alpha2,beta2) #Compute convolution y = np.convolve(f1, f2) #Reduce to [0..2] support y = y[0:len(x)] #Normalize (so that all values sum to 1/res) y = y / (sum(y) * res) return y
def fit_beta_by_pt(nsteps): """Fit a beta distribution by parallel tempering.""" num_dimensions = 1 # Create the dummy model b = BetaFit(0.5, 0.5) # Create the options opts = MCMCOpts() opts.model = b opts.estimate_params = b.parameters opts.initial_values = [10 ** 0.5] opts.nsteps = nsteps opts.anneal_length = 0 opts.T_init = 1 opts.use_hessian = False opts.seed = 1 opts.norm_step_size = 0.5 opts.likelihood_fn = b.likelihood opts.step_fn = step # Create the MCMC object num_temps = 8 pt = PT_MCMC(opts, num_temps, 10) pt.estimate() plt.ion() for chain in pt.chains: fig = plt.figure() chain.prune(nsteps/10, 1) (heights, points, lines) = plt.hist(chain.positions, bins=100, normed=True) plt.plot(points, beta.pdf(points, b.a, b.b), 'r') plt.ylim((0,10)) plt.xlim((0, 1)) return pt
def fit_beta(nsteps): """Fit a beta distribution by MCMC.""" num_dimensions = 1 # Create the dummy model b = BetaFit(0.5, 0.5) # Create the options opts = MCMCOpts() opts.model = b opts.estimate_params = b.parameters opts.initial_values = [1.001] opts.nsteps = nsteps opts.anneal_length = nsteps/10 opts.T_init = 100 opts.use_hessian = False opts.seed = 1 opts.norm_step_size = 0.01 opts.likelihood_fn = b.likelihood opts.step_fn = step # Create the MCMC object mcmc = MCMC(opts) mcmc.initialize() mcmc.estimate() mcmc.prune(nsteps/10, 1) plt.ion() for i in range(mcmc.num_estimate): plt.figure() (heights, points, lines) = plt.hist(mcmc.positions[:,i], bins=100, normed=True) plt.plot(points, beta.pdf(points, b.a, b.b), 'r') return mcmc
def plot(a, b, trial, ctr): x = np.linspace(0, 1, 200) y = beta.pdf(x, a, b) mean = float(a) / (a + b) plt.plot(x, y) plt.title("Distributions after %s trials, true rate = %.1f, mean = %.2f" % (trial, ctr, mean)) plt.show()
def plot_beta(name, a, b, ret=None, n=None): print(a, b) theta = linspace(0, 1, 300) pdf = beta.pdf(theta, a, b) ax = axes() ax.plot(theta, pdf / max(pdf)) if n is not None: ax.text(0.025, 0.9, 'TRADE IDEA %d' % n) if ret is not None: ax.text(0.025, 0.85, 'RETURN %s 0' % ('>' if ret else '<')) ax.set_title('P(hit rate | ideas so far)') ax.yaxis.set_ticks([]) ax.grid() ax.legend() ax.xaxis.set_label_text('Hit Rate') ax.xaxis.set_ticks(linspace(0, 1, 11)) s, e = (beta.ppf(0.025, a, b), beta.ppf(0.975, a, b)) ax.fill([s, s, e, e, s], [0, 1, 1, 0, 0], color='0.9') gcf().set_size_inches(10, 6) savefig(name, bbox_inches='tight') plt.close() return
def setBetaDistribution(self): """ """ choosePrimObj = [.25,.75] choosePrimObj = beta.pdf(choosePrimObj,1.3+self.primCount,1) choosePrimObj /= choosePrimObj.sum() self.choosePrimObj = choosePrimObj
def bernoulli_posterior(data, prior): n_1 = sum(data) n_2 = len(data) - n_1 x = np.arange(0, 1.01, step=.01) y = beta.pdf(x, prior[0] + n_1 - 1, prior[1] + n_2 - 1) plt.plot(x, y) plt.show()
def plot_beta(a, b, fill = False): betap = partial(beta.pdf, a, b) xs = np.linspace(0,1, num = 150) #y = list(map(betap, x)) y = [beta.pdf(x, a, b) for x in xs] fills = [ x <= 0.5 for x in xs] #sio = cStringIO.StringIO() sio = io.BytesIO() #sio = io.StringIO() fig = plt.figure() ax1 = fig.add_subplot(111) #ax1.set_ylim(bottom=0) #ax1.set_ylim(bottom if fill: ax1.fill_between(xs ,y,0,where=fills , color='0.8') ax1.plot(xs, y) fig.savefig(sio, format='png') enc = base64.b64encode(sio.getvalue()) plt.close("all") #hex = enc.strip() hex = enc.decode('utf-8') #hex = sio.getvalue().encode("base64").strip() prob = beta.cdf(0.5, a, b) return hex, prob
def prob_noisy(given, bayes, noise_type, noise): """Prob of saying given when rational bayesian would say bayes Args: given: in [0,1], what respondent said bayes: in [0, 1] model_params: dict, incl keys with noise info Returns: prob in [0, 1] Different types of noise models: beta, binomial, noiseless. """ if noise_type == "noiseless": tolerance = 1e-04 if jmutils.fuzzy_equals(given, bayes, tolerance): return 1 else: return 0 elif noise_type == "binomial": num_trials = noise num_successes = int(given * num_trials) #return binom.pmf(num_successes, num_trials, bayes) return my_binom_pmf(num_successes, num_trials, bayes) elif noise_type == "beta": print("Optimize the beta stuff almost certainly!!!") alpha_beta = jmutils.beta_shape(bayes, noise) return beta.pdf(given, *alpha_beta) elif noise_type == "truncnorm": scale = noise #return truncnorm.pdf(given, -bayes / scale, (1 - bayes) / scale, # loc=bayes, scale=scale) return mytruncpdf_01bounds(given, bayes, noise) else: print("Error: meta noise_type not specified correctly") sys.exit()
def BernBeta(priorBetaAB,Data): ''' priorBetaAB: Tuple of beta(a,b) parameters ''' # For notational convenience, rename components of priorBetaAB: a = priorBetaAB[0] b = priorBetaAB[1] fig, ax = plt.subplots(3, 1,figsize=(16, 16)) Theta = np.arange(0.001,1, 0.001) # points for plotting pTheta = beta.pdf(Theta, a, b) # prior for plotting ax[0].fill_between(Theta, beta.pdf(Theta, a, b)) ax[0].set(ylabel = 'test',title = 'Prior(beta)') ax[1].fill_between(Theta, beta.pdf(Theta, a, b)) ax[1].set(xlabel = 'test',title = 'Prior(beta)') ax[2].fill_between(Theta, beta.pdf(Theta, a, b)) ax[2].set(xlabel = 'test',title = 'Prior(beta)')
def plot(bandits, trial): x = np.linspace(0, 1, 200) for b in bandits: y = beta.pdf(x, b.a, b.b) plt.plot(x, y, label="real p: %.4f" % b.p) plt.title("Bandit distributions after %s trials" % trial) plt.legend() plt.show()
def wat(x, s_n, ar): f = 1 for i in xrange(len(ar)): if i is s_n: f = f*beta.pdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1) #f(Sa|Dt) #Dt is information available before reward else: f = f*beta.cdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1) #F(S < Sa|Dt) #D is information available before reward return f
def one_vote(N, threshold=0.5, ab=False, forecasts=False, normal=False, p=False, diagnostic=False): import numpy as np if sum(map(bool,[ab, forecasts, normal, p])) != 1: raise ValueError("Please specify one and only one of the 'ab', 'forecasts', 'normal', or 'p' options.") if ab: a, b = ab elif forecasts: from scipy.stats import beta a, b, _, _ = beta.fit(forecasts, floc=0, fscale=1) elif normal: from functions import fit_beta_to_normal m, s = normal a, b = fit_beta_to_normal(m,s) else: pass if p: from functions import mp_binom victory_pr = mp_binom(np.ceil(N*threshold),N,p) if (N*threshold).is_integer(): # tie probability in case N*threshold is whole: tie_pr = mp_binom(N*(1-threshold)+1,N,p) elif not (N*threshold).is_integer() and threshold != 0.5: # tie probability in case N*threshold is not whole: tie_pr = mp_binom(N*(1-threshold),N,p) else: tie_pr = 0 elif a and b: from functions import beta_binomial victory_pr = beta_binomial(np.ceil(N*threshold),N,a,b,multi_precission=True) if (N*threshold).is_integer(): # tie probability in case N*threshold is whole: tie_pr = beta_binomial(N*(1-threshold)+1,N,a,b,multi_precission=True) elif not (N*threshold).is_integer() and threshold != 0.5: # tie probability in case N*threshold is not whole: tie_pr = beta_binomial(N*(1-threshold),N,a,b,multi_precission=True) else: tie_pr = 0 else: pass if diagnostic: import matplotlib.pyplot as plt x = np.linspace(0,1,1000) try: plt.style.use('http://chymera.eu/matplotlib/styles/chymeric-gnome.mplstyle') except ValueError: plt.style.use('ggplot') plt.axvline(x=threshold, color="#fbb4b9", linewidth=1) plt.legend(['percentage\n threshold'], loc='upper right') plt.plot(x, beta.pdf(x,a,b)) plt.xlabel('Reference Candidate Vote Share') plt.ylabel('PDF') plt.show() total_pr = victory_pr+tie_pr return total_pr, victory_pr, tie_pr
def main(): """ Two beta distributions with the same mean but differing variance. The distributions reflect kicking averages of a player who has been very consistent through his career verses another who has had a range of better and worse years. On the x-axis, we see the expected score ratio over the season - the y-axis indicating the likelihood of that average The prime graphs (lighter colouring) show the change in belief for the players' expected season average after 4 kicks (trials) where only 1 was successful. The more consistent player's season expectation changes very little relative to the more varied player. Adapted from this explanation: http://varianceexplained.org/statistics/beta_distribution_and_baseball/ """ a_1 = 61 b_1 = 20 a_2 = 610 b_2 = 200 this_season_scores = 1 this_season_misses = 3 print(mean(a_1, b_1)) print(mean(a_2, b_2)) x = np.linspace(0, 1, 1000) y_1 = beta.pdf(x, a_1, b_1) y_2 = beta.pdf(x, a_2, b_2) y_1_prime = beta.pdf(x, a_1 + this_season_scores, b_1 + this_season_misses) y_2_prime = beta.pdf(x, a_2 + this_season_scores, b_2 + this_season_misses) plt.plot(x, y_1, 'r', lw=2, alpha=0.8, label='a = 61, b = 20') plt.plot(x, y_1_prime, 'r', lw=2, alpha=0.2, label='a = 62, b = 23') plt.plot(x, y_2, 'b', lw=2, alpha=0.8, label='a = 610, b = 200') plt.plot(x, y_2_prime, 'b', lw=2, alpha=0.2, label='a = 611, b = 203') plt.xlim(0.3, 1.0) plt.xlabel('p(scoring)') plt.ylabel('probability density') plt.legend(loc='upper left') plt.show()
def betadist(betaparams,B,pcF): #defining beta distribution parameters a=float(betaparams['a'].value) b=float(betaparams['b'].value) loc=float(betaparams['loc'].value) scale=float(betaparams['scale'].value) #creating fitted data model_pcF=beta.pdf(B,a,b,loc=loc,scale=scale) #returning residual return (model_pcF-pcF)
def plotBayesian(aprior, bprior, apost, bpost): x = np.linspace(0.2, 0.8, 1001) prior = beta.pdf(x, aprior, bprior) post = beta.pdf(x, apost, bpost) plt.plot(x, prior, color='m', linewidth = 3) plt.plot(x, post, color='g', linewidth = 3) priormax = aprior/(aprior+bprior) postmax = apost/(apost+bpost) plt.plot([priormax, priormax], [0, 6], color='r', linewidth=3) plt.plot([postmax, postmax], [0, 6], color='r', linewidth=3) plt.title('Prior and Posterior Probability Distrubution for Florida', fontsize=36) plt.xlabel('Probability that Obama wins', fontsize=28) plt.ylabel('Probability Density', fontsize=28) plt.tick_params(labelsize=20) magenta_line = mpatches.Patch(color='magenta', label='Prior Distribution') green_line = mpatches.Patch(color='green', label='Posterior Distribution') red_line = mpatches.Patch(color='red', label='Most Likely Probability') plt.legend(handles=[magenta_line, green_line, red_line], prop={'size': 24}) plt.show()
def pEmission(self,z,x): """ Returns a number proportional to the probability of x given z """ # TODO: each poll should be chosen according to a multinomial distribution rather than a dirichlet distribution res = 1 for i in xrange(self.num_polls): alpha = self.b[i]*z res *= Beta.pdf(x[i,0], alpha[0], alpha[1]) return res
def main(): a = 81 b = 219 stats = beta.stats(a, b, moments='mvsk') print('Mean: %.3f, Variance: %.3f, Skew: %.3f, Kurtosis: %.3f' % stats) x = np.linspace(0, 1, 1000) y = beta.pdf(x, a, b) plt.plot(x, y, 'r-', lw=2, alpha=0.6, label='beta pdf') plt.legend() plt.show()
def plot_stats(): data = request.form x = np.linspace(0.1,0.6,200) y_a = beta.pdf(x, 1 + int(data['conversion_a']), 1 + int(data['allocation_a']) - int(data['conversion_a'])) y_b = beta.pdf(x, 1 + int(data['conversion_b']), 1 + int(data['allocation_b']) - int(data['conversion_b'])) plt.plot(x, y_a, label='A') plt.plot(x, y_b, label='B') buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) image_data = base64.b64encode(buf.read()) response = Response(response=image_data, status=200) buf.close() plt.close() return response
def _nll(param, data): """ Negative log likelihood function for beta distribution """ from scipy.stats import beta a, b = param pdf = beta.pdf(data, a, b) lg = np.log(pdf) mask = np.isfinite(lg) nll = -lg[mask].sum() return nll
def inferPosterior(self, state, action, prior='uniform'): """ Uses inference engine to compute posterior probability from the likelihood and prior (beta distribution). """ if prior == 'beta': # Beta Distribution self.prior = np.linspace(.01,1.0,101) self.prior = beta.pdf(self.prior,1.4,1.4) self.prior /= self.prior.sum() elif prior == 'shiftExponential': # Shifted Exponential self.prior = np.zeros(101) for i in range(50): self.prior[i + 50] = i * .02 self.prior[100] = 1.0 self.prior = expon.pdf(self.prior) self.prior[0:51] = 0 self.prior *= self.prior self.prior /= self.prior.sum() elif prior == 'shiftBeta': # Shifted Beta self.prior = np.linspace(.01,1.0,101) self.prior = beta.pdf(self.prior,1.2,1.2) self.prior /= self.prior.sum() self.prior[0:51] = 0 elif prior == 'uniform': # Uniform self.prior = np.zeros(len(self.sims)) self.prior = uniform.pdf(self.prior) self.prior /= self.prior.sum() self.posterior = self.likelihood * self.prior self.posterior /= self.posterior.sum()
def set_prior(self, theta_min, theta_max, num_theta, dist_name): self.theta_val = np.linspace(theta_min, theta_max, num=num_theta) self.num_theta = num_theta if dist_name == 'uniform': # the density is uniform self.theta_density = np.ones(num_theta) / num_theta elif dist_name == 'beta': # centered beta # rescale to move away from the boundary self.theta_density = beta.pdf((self.theta_val - theta_min) / (theta_max - theta_min + 0.1), 2, 2) # renormalize self.theta_density = self.theta_density / sum(self.theta_density) else: raise Exception('Unknown prior distribution.')
def betaNLL(param,*args): '''Negative log likelihood function for beta <param>: list for parameters to be fitted. <args>: 1-element array containing the sample data. Return <nll>: negative log-likelihood to be minimized. ''' a,b=param data=args[0] pdf=beta.pdf(data,a,b,loc=0,scale=1) lg=np.log(pdf) #-----Replace -inf with 0s------ lg=np.where(lg==-np.inf,0,lg) nll=-1*np.sum(lg) return nll
def get_pdf(self, x): return beta.pdf(x, self.alpha, self.beta)
import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages import numpy as np from scipy.stats import beta fig, ax = plt.subplots(1, 1, figsize=(3.5, 4.5)) plt.tick_params(axis='both', which='major', labelsize=14) a, b = .5, 1.5 x = np.linspace(beta.ppf(0.00, a, b), beta.ppf(1, a, b), 100) # ax.plot(x, beta.pdf(x, a, b),'k.', lw=2, alpha=1, label='beta pdf') a, b = 4, 12 x = np.linspace(beta.ppf(0., a, b), beta.ppf(1, a, b), 100) ax.plot(x, beta.pdf(x, a, b),'k:', lw=2, alpha=1, label='$(4, 12)$') a, b = 1, 3 x = np.linspace(beta.ppf(0.05, a, b), beta.ppf(1, a, b), 100) ax.plot(x, beta.pdf(x, a, b),'k--', lw=2, alpha=1, label='$(1, 3)$') a, b = 2, 6 x = np.linspace(beta.ppf(0.00, a, b), beta.ppf(1, a, b), 1000) ax.plot(x, beta.pdf(x, a, b),'k-', lw=2, alpha=1, label='$(2, 6)$') a, b = 0.25, .75 x = np.linspace(beta.ppf(0.3, a, b), beta.ppf(.99, a, b), 200) ax.plot(x, beta.pdf(x, a, b),'k-.', lw=2, alpha=1, label='$(0.25, 0.75)$')
def z_prob(z): return ([ beta.pdf(xv, a1, b1) * beta.pdf(xv / z, a2, b2) for xv in np.linspace(0.1, 1, 100) ])
mpl.rc("font", family="serif") def likelihood(theta, n, k): return binom(n, k) * theta**k * (1 - theta)**(n - k) n = 11 k = 8 a = 2 b = 2 X = np.linspace(0, 1, num=1000) t = likelihood( X, n, k) * gamma(n + 2) / (gamma(k + 1) * gamma((n - k) + 1) * binom(n, k)) prior = beta.pdf(X, a, b) posterior = beta.pdf(X, a + k, b + (n - k)) fig, ax = plt.subplots(figsize=(7, 7 / 1.4)) y_max = 4 turq = mpl.colors.to_rgb("turquoise") mag = mpl.colors.to_rgb("magenta") mix = [(turq[i] + mag[i]) / 2 for i in range(3)] ax.plot(X, prior, color=turq, label="Prior", zorder=2) ax.plot(X, t, color=mag, label="Likelihood (normalized)", zorder=2) ax.plot(X, posterior, color=mix, label="Posterior", zorder=2) theta_map = (a + k - 1) / (a + b + n - 2) posterior_max = beta.pdf(theta_map, a + k, b + (n - k))
#And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show() #beta Continuous distributions¶ from scipy.stats import beta import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) #Calculate a few first moments: a, b = 2.31, 0.627 mean, var, skew, kurt = beta.stats(a, b, moments='mvsk') #Display the probability density function (pdf): x = np.linspace(beta.ppf(0.01, a, b), beta.ppf(0.99, a, b), 100) ax.plot(x, beta.pdf(x, a, b), 'r-', lw=5, alpha=0.6, label='beta pdf') #Alternatively, the distribution object can be called (as a function) to fix the shape, location and scale parameters. This returns a “frozen” RV object holding the given parameters fixed. #Freeze the distribution and display the frozen pdf: rv = beta(a, b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') #Check accuracy of cdf and ppf: vals = beta.ppf([0.001, 0.5, 0.999], a, b) np.allclose([0.001, 0.5, 0.999], beta.cdf(vals, a, b)) True #Generate random numbers: r = beta.rvs(a, b, size=1000) #And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
if '-Nexp' in sys.argv: p = sys.argv.index('-Nexp') M = int(sys.argv[p+1]) if M > 0: M_exp = M else: print('Enter positive Nexp arg.') #the probability distribution from which to sample --> beta function with parameters a, b a,b=7,3 x = np.arange (0.01, 1, 0.01) y = scipybeta.pdf(x,a,b) plt.figure() plt.plot(x,y,label = r'$\alpha =$' + str(a) +', '+ r'$\beta =$' + str(b)) plt.xlabel('x',fontsize=16) plt.ylabel(r'Beta($\alpha, \beta$)',fontsize=16) plt.legend(fontsize=10) plt.title('Non-Normalized, Continuous Prob Distribution',fontsize=15) plt.show() average_list = [] for i in range(0,M_exp): vals=[] for i in range(0,N_samples): vals.append(npbeta(a,b)) vals = np.asarray(vals)
#Q1 #Two lists alpha1 and alpha2 contains 5 pairs of values with (a1,a2) pair as (alpha1[i],alpha2[i]) for i from 0 to 4 alpha1 = [2, 1, 3, 4, 7] alpha2 = [3, 9, 5, 4, 5] #Q2 #x_star list containing x* values for each pair x_star = [] for i in range(5): x_star.append((alpha1[i] - 1) / (alpha1[i] + alpha2[i] - 2)) #Q3 #f list contains f(x*) values for respective x* values f = [] for i in range(5): c = beta.pdf(x_star[i], alpha1[i], alpha2[i]) f.append(c) # print(c) print(x_star) print(f) #Q4 and Q5 #acceptance rejection method and histogram plots bin = [ 0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1 ] for i in range(5): c = f[i] U1 = [] a = nm.random.random_sample() f_a = beta.pdf(a, alpha1[i], alpha2[i])
'scale': 10, 'mixture': 0.8 } # Proposed values are a Gaussian peturbation away from the previous values. # This is controlled by the sigma of the gaussian, which is defined for each variable proposal_sigma = { 'missing': 0.025, 'shape': 0.05, 'scale': 2, 'mixture': 0.025, } # PRIORS priors = (lambda x: { 'missing': beta.pdf(x['missing'], a=3, b=15), 'mixture': beta.pdf(x['mixture'], a=1.1, b=1.1), 'shape': gma.pdf(x['shape'], a=10, scale=1 / 5), 'scale': gma.pdf(x['scale'], a=6, scale=50) }) def test_mcmc(): folder = os.path.dirname(os.path.abspath(__file__)) file = "/mcmc_test_chain" chain = mcmc.run_MCMC(data=am_data, initial_parameters=initial_parameters, proposal_sigma=proposal_sigma, priors=priors, thin=1,
import matplotlib.pyplot as plt import numpy as np from scipy.stats import beta, norm T = 501 true_ctr = 0.5 a, b = 1, 1 plot_indices = (10, 20, 30, 50, 100, 200, 500) data = np.empty(T) for i in range(T): x = 1 if np.random.random() < true_ctr else 0 data[i] = x a += x b += 1 - x if i in plot_indices: p = data[:i].mean() n = i + 1 std = np.sqrt(p * (1 - p) / n) x = np.linspace(0, 1, 200) g = norm.pdf(x, loc=p, scale=std) plt.plot(x, g, label="Gaussian Approximation") posterior = beta.pdf(x, a=a, b=b) plt.plot(x, posterior, label="Beta Posterior") plt.legend() plt.title(f"N = {n}") plt.show()
def get_arrays_for_plotting(self): to_plot = np.linspace(0, 1, 100) y_plot = beta_dist.pdf(to_plot, self.alpha, self.beta) return to_plot, y_plot
label="true pdf") ax[0, col].plot(xgrid, pdf_est * n_samples * (xgrid[1] - xgrid[0]), 'r', label="estimated pdf") if i == 0: ax[0, i].legend() ax[0, col].set_title( ("After %d iterations\n" + "($\\mathrm{E}_q[\\tau]$=%.3f, $\\mathrm{E}_q[\\theta]$=%.3f)") % (i + 1, tau_est, theta_est)) ax[0, col].set_xlabel("$x$") # plot marginal distribution of tau tau = np.linspace(0, 1.0, 1000) q_tau = beta.pdf(tau, N2 + alpha0, N1 + alpha0) ax[1, col].plot(tau, q_tau) ax[1, col].set_xlabel("$\\tau$") # plot marginal distribution of theta theta = np.linspace(-4.0, 8.0, 1000) q_theta = norm.pdf(theta, m2, np.sqrt(1 / beta2)) ax[2, col].plot(theta, q_theta) ax[2, col].set_xlabel("$\\theta$") col = col + 1 # finalize the plot ax[1, 0].set_ylabel("$q(\\tau)$") ax[2, 0].set_ylabel("$q(\\theta)$") plt.tight_layout() plt.show()
plt.legend(loc='best') plt.show() a_1 = np.linspace(0,10,100) a_2 = np.linspace(0,10,100) b_1 = np.linspace(0,10,100) b_2 = np.linspace(0,10,100) pi = np.linspace(0,1,10) input_space = np.linspace(0,1,1000) for i in range(5): pi_rvs = randint.rvs(0,10) a_1_rvs = randint.rvs(0,100) a_2_rvs = randint.rvs(0,100) b_1_rvs = randint.rvs(0,100) b_2_rvs = randint.rvs(0,100) bibeta_example_pdf = pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]) bibeta_example_cdf = pi[pi_rvs]*beta.cdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.cdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]) ax = plt.subplot(111) ax.plot(input_space, pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]), label="1 comp pdf") ax.plot(input_space, (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="2 comp pdf") ax.plot(input_space, pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="Mix pdf") plt.legend(loc='best') plt.show() ax = plt.subplot(111) ax.plot(input_space, pi[pi_rvs]*beta.cdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]), label="1 comp cdf") ax.plot(input_space, (1-pi[pi_rvs])*beta.cdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="2 comp cdf") ax.plot(input_space, pi[pi_rvs]*beta.cdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.cdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="Mix cdf") plt.legend(loc='best') plt.show() bibeta_example_pdf = pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs])
def p_z(z, include_gt): return np.sum([ beta.pdf(xv, a1, b1) * beta.pdf(xv / z, a2, b2) for xv in np.linspace(0.1, 1, 100) ]) + (gt_prob * beta.pdf(z, 60, 3) if include_gt else 0)
def get_gene_info( *, annotated_vcf, variant_col, af_col, alt_col='Alt', del_col, output_dir, genes_col, maf_threshold=0.01, beta_param, weight_func='beta' ): """ Create temporary files with variant information for each gene, plus the weights calculated. Parameters ---------- annotated_vcf : str a file containing the variant, AF, ALT, Gene, and deleterious score. variant_col : str the name of the variant column. af_col : str the name of the Allele Frequency column. alt_col : str the name of the alternate allele column. del_col : str the name of functional annotation column. output_dir : str directory to save in temporary files. genes_col : str the name of genes column. maf_threshold : float between [0.0-1.0]. the minor allele frequency threshold, default is 0.01. beta_param : tuple the parameters of the beta function, if chosen for weighting. weight_func : str the weighting function, beta or log10. Returns ------- output directory with all the temporary files. """ skip = 0 if annotated_vcf.endswith('.gz'): with gzip.open(annotated_vcf, 'r') as fin: for line in fin: if line.decode('utf-8').startswith('##'): skip += 1 else: with open(annotated_vcf, 'r') as file: for line in file: if line.startswith('##'): skip += 1 df = pd.read_csv(annotated_vcf, usecols=[variant_col, alt_col, 'INFO'], skiprows=skip, sep=r'\s+', index_col=False) info = df['INFO'].str.split(pat=';', expand=True) missing_info = info[info.isnull().any(axis=1)].index df.drop(missing_info, inplace=True) df.reset_index(drop=True, inplace=True) info.drop(missing_info, inplace=True) info.reset_index(drop=True, inplace=True) for col in info.columns: val = info[col][0].split('=') if len(val) == 1: continue info.rename(columns={col: val[0]}, inplace=True) info[val[0]] = info[val[0]].str.replace(val[0] + "=", "") df = pd.concat([df, info], axis=1) df = df[df[af_col].values.astype(float) < maf_threshold] df.replace('.', 0.0, inplace=True) if weight_func == 'beta': df[weight_func] = beta.pdf(df[af_col].values.astype(float), beta_param[0], beta_param[1]) elif weight_func == 'log10': df[weight_func] = -np.log10(df[af_col].values.astype(float)) df[weight_func].replace([np.inf, -np.inf, np.nan], 0.0, inplace=True) df['score'] = df[weight_func].values.astype(float) * df[del_col].values.astype(float) genes = list(set(df[genes_col])) if not os.path.exists(output_dir): os.mkdir(output_dir) gene_file = output_dir + '.genes' with open(gene_file, 'w') as f: f.writelines("%s\n" % gene for gene in genes) [df[df[genes_col] == gene][[variant_col, alt_col, 'score', genes_col]].to_csv(os.path.join(output_dir, ( str(gene) + '.w')), index=False, sep='\t') for gene in tqdm(genes, desc="writing w gene files")] [df[df[genes_col] == gene][[variant_col, alt_col]].to_csv(os.path.join(output_dir, (str(gene) + '.v')), index=False, sep='\t') for gene in tqdm(genes, desc="writing v gene files")] return output_dir
## into a single number. This is not completely trivial, as you ## need to combine the negative and positive Z into it, but I ## think you can all work it out. P_dependent *= P_D_positive * P_D_negative P_independent *= P_D print( "Now calculate a posterior distribution for the relevant Bernoulli parameter. Focus on just one value of y for simplicity" ) # First plot the joint distribution prior_alpha = 1 prior_beta = 1 xplot = np.linspace(0, 1, 200) pdf_p = beta.pdf(xplot, prior_alpha + positive_alpha, prior_beta + positive_beta) pdf_n = beta.pdf(xplot, prior_alpha + negative_alpha, prior_beta + negative_beta) pdf_m = beta.pdf(xplot, prior_alpha + positive_alpha + negative_alpha, prior_beta + positive_beta + negative_beta) n_figures += 1 plt.figure(n_figures) plt.clf() plt.plot(xplot, pdf_p) plt.plot(xplot, pdf_n) plt.plot(xplot, pdf_m) plt.legend(["z=1", "z=-1", "marginal"]) plt.title("y=" + str(y)) print("Probability of independence: ", P_independent / (P_independent + P_dependent))
import numpy as np from scipy.stats import beta import matplotlib.pylab as plt x = np.linspace(0, 1) unif = beta.pdf(x, 1, 1) cent = beta.pdf(x, 2.3, 2.3) cent2 = beta.pdf(x, 12, 12) skewed = beta.pdf(x, 3, 1) plt.figure(figsize=(13, 4)) plt.subplot(1, 5, 1) plt.plot(x, unif) plt.ylim((0, 4)) plt.title('(A)') plt.ylabel('rozdělení $p$') plt.xlabel('p') plt.subplot(1, 5, 2) plt.plot(x, cent) plt.title('(B1)') plt.xlabel('p') plt.ylim((0, 4)) plt.subplot(1, 5, 3) plt.plot(x, cent2) plt.title('(B2)') plt.xlabel('p') plt.ylim((0, 4)) plt.subplot(1, 5, 4) plt.plot(x, skewed) plt.ylim((0, 4)) plt.xlabel('p')
在程序中,使用a代表alpha,b代表beta 一个确定的概率密度函数,有其均值和标准差 通过beta分布的alpha和beta可以直接求出此分布的mu和sigma 通过其mu和sigma也可以求出其alpha和beta ''' import numpy as np from scipy.stats import beta import matplotlib.pyplot as plt import seaborn as sns if __name__ == "__main__": sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (8, 4)}) x = np.linspace(0, 1, 100) params = [(0.5, 0.5), (1.0, 1.0), (4.0, 3.0), (2.0, 5.0), (6.0, 6.0)] for p in params: y = beta.pdf(x, p[0], p[1]) a = p[0] #mid alpha b = p[1] #mid beta u = a / (a + b) #mid mu s = ((a * b) / ((a + b)**2 * (a + b + 1)))**0.5 #mid sigma plt.plot( x, y, label="$\\alpha=%.4f$, $\\beta=%.4f$, $\\mu=%.4f$, $\\sigma=%.4f$" % (a, b, u, s)) plt.xlabel("$\\theta$, Fairness") plt.ylabel("Density") plt.legend(title="Parameters") plt.show()
def sigma_lnprior(sigma, alpha_value, beta_value): return np.log(beta.pdf(abs(sigma), alpha_value, beta_value))
def f_beta(thetal, mu, nu): return beta.pdf(t(thetal), mu, nu)*2./np.pi
def coin_flip_posterior(h, n, r): return beta.pdf(h, n + 1, n - r + 1)
def pdf(self, grades): assert self.a is not None and self.b is not None, 'Params have not been set. First run .fit()' x = np.linspace(0, 1, 101) return x, beta.pdf(x, self.a, self.b)
def plot_beta_distribution(a, b): x = np.linspace(0, 1, 1000) y = beta.pdf(x, a=a, b=b) plt.plot(x, y, lw=3, alpha=0.7, label='a=%s, b=%s' % (a, b))
20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 17, 20, 20, 20, 20, 19, 19, 18, 18, 25, 24, 23, 20, 20, 20, 20, 20, 20, 10, 49, 19, 46, 27, 17, 49, 47, 20, 20, 13, 48, 50, 20, 20, 20, 20, 20, 20, 20, 48, 19, 19, 19, 22, 46, 49, 20, 20, 23, 19, 22, 20, 20, 20, 52, 46, 47, 24, 14 ]) M = len(y) # plot the separate and pooled models plt.figure(figsize=(8,10)) x = np.linspace(0, 1, 250) # separate plt.subplot(2, 1, 1) lines = plt.plot(x, beta.pdf(x[:,None], y[:-1] + 1, n[:-1] - y[:-1] + 1), linewidth=1) # highlight the last line line1, = plt.plot(x, beta.pdf(x, y[-1] + 1, n[-1] - y[-1] + 1), 'r') plt.legend((lines[0], line1), (r'Posterior of $\theta_j$', r'Posterior of $\theta_{71}$')) plt.yticks(()) plt.title('separate model') # pooled plt.subplot(2, 1, 2) plt.plot(x, beta.pdf(x, y.sum() + 1, n.sum() - y.sum() + 1), linewidth=2, label=(r'Posterior of common $\theta$')) plt.legend() plt.yticks(()) plt.xlabel(r'$\theta$', fontsize=20)
beta_params_a = [1] * latent_dim beta_params_a[0] = alpha beta_params_b = [1] * latent_dim ############################################################# # BUILD DATA SET if data_set == 1: # X ~ MP = Unif <-- Observed, thinned. # Y ~ P = Beta <-- Target, unthinned. # Weights = 1/M = P/(MP) = Beta/Unif = Beta latent = np.random.uniform(0, 1, size=(data_num, latent_dim)) latent_unthinned = np.random.beta(beta_params_a, beta_params_b, (data_num, latent_dim)) weights = vert(beta.pdf(latent[:, 0], alpha, 1.)) weights_unthinned = vert(beta.pdf(latent_unthinned[:, 0], alpha, 1.)) fixed_transform = np.random.normal(0, 1, size=(latent_dim, data_dim)) data = np.dot(latent, fixed_transform) data_unthinned = np.dot(latent_unthinned, fixed_transform) elif data_set == 2: # X ~ MP = Beta <-- Observed, thinned. # Y ~ P = Unif <-- Target, unthinned. # Weights = 1/M = P/(MP) = Unif/Beta = 1/Beta latent = np.random.beta(beta_params, beta_params, (data_num, latent_dim)) latent_unthinned = np.random.uniform(0, 1, size=(data_num, latent_dim)) weights = vert(1. / beta.pdf(latent[:, 0], alpha, 1.)) weights_unthinned = vert(1. / beta.pdf(latent_unthinned[:, 0], alpha, 1.))
def beta_pdf(x): return beta_rv.pdf(x, a=alpha_stat, b=beta_stat) x_marginal_pdfs = [beta_pdf]*num_vars
def test_multi_action_distribution(self): """Tests the MultiActionDistribution (across all frameworks).""" batch_size = 1000 input_space = Tuple([ Box(-10.0, 10.0, shape=(batch_size, 4)), Box(-2.0, 2.0, shape=( batch_size, 6, )), Dict({"a": Box(-1.0, 1.0, shape=(batch_size, 4))}), ]) std_space = Box(-0.05, 0.05, shape=( batch_size, 3, )) low, high = -1.0, 1.0 value_space = Tuple([ Box(0, 3, shape=(batch_size, ), dtype=np.int32), Box(-2.0, 2.0, shape=(batch_size, 3), dtype=np.float32), Dict({"a": Box(0.0, 1.0, shape=(batch_size, 2), dtype=np.float32)}) ]) for fw, sess in framework_iterator(session=True): if fw == "torch": cls = TorchMultiActionDistribution child_distr_cls = [ TorchCategorical, TorchDiagGaussian, partial(TorchBeta, low=low, high=high) ] else: cls = MultiActionDistribution child_distr_cls = [ Categorical, DiagGaussian, partial(Beta, low=low, high=high), ] inputs = list(input_space.sample()) distr = cls(np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1), model={}, action_space=value_space, child_distributions=child_distr_cls, input_lens=[4, 6, 4]) # Adjust inputs for the Beta distr just as Beta itself does. inputs[2]["a"] = np.clip(inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)) inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0 # Sample deterministically. expected_det = [ np.argmax(inputs[0], axis=-1), inputs[1][:, :3], # [:3]=Mean values. # Mean for a Beta distribution: # 1 / [1 + (beta/alpha)] * range + low (1.0 / (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, 0:2])) * (high - low) + low, ] out = distr.deterministic_sample() if sess: out = sess.run(out) check(out[0], expected_det[0]) check(out[1], expected_det[1]) check(out[2]["a"], expected_det[2]) # Stochastic sampling -> expect roughly the mean. inputs = list(input_space.sample()) # Fix categorical inputs (not needed for distribution itself, but # for our expectation calculations). inputs[0] = softmax(inputs[0], -1) # Fix std inputs (shouldn't be too large for this test). inputs[1][:, 3:] = std_space.sample() # Adjust inputs for the Beta distr just as Beta itself does. inputs[2]["a"] = np.clip(inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)) inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0 distr = cls(np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1), model={}, action_space=value_space, child_distributions=child_distr_cls, input_lens=[4, 6, 4]) expected_mean = [ np.mean(np.sum(inputs[0] * np.array([0, 1, 2, 3]), -1)), inputs[1][:, :3], # [:3]=Mean values. # Mean for a Beta distribution: # 1 / [1 + (beta/alpha)] * range + low (1.0 / (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, :2])) * (high - low) + low, ] out = distr.sample() if sess: out = sess.run(out) out = list(out) if fw == "torch": out[0] = out[0].numpy() out[1] = out[1].numpy() out[2]["a"] = out[2]["a"].numpy() check(np.mean(out[0]), expected_mean[0], decimals=1) check(np.mean(out[1], 0), np.mean(expected_mean[1], 0), decimals=1) check(np.mean(out[2]["a"], 0), np.mean(expected_mean[2], 0), decimals=1) # Test log-likelihood outputs. # Make sure beta-values are within 0.0 and 1.0 for the numpy # calculation (which doesn't have scaling). inputs = list(input_space.sample()) # Adjust inputs for the Beta distr just as Beta itself does. inputs[2]["a"] = np.clip(inputs[2]["a"], np.log(SMALL_NUMBER), -np.log(SMALL_NUMBER)) inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0 distr = cls(np.concatenate([inputs[0], inputs[1], inputs[2]["a"]], axis=1), model={}, action_space=value_space, child_distributions=child_distr_cls, input_lens=[4, 6, 4]) inputs[0] = softmax(inputs[0], -1) values = list(value_space.sample()) log_prob_beta = np.log( beta.pdf(values[2]["a"], inputs[2]["a"][:, :2], inputs[2]["a"][:, 2:])) # Now do the up-scaling for [2] (beta values) to be between # low/high. values[2]["a"] = values[2]["a"] * (high - low) + low inputs[1][:, 3:] = np.exp(inputs[1][:, 3:]) expected_log_llh = np.sum( np.concatenate([ np.expand_dims( np.log( [i[values[0][j]] for j, i in enumerate(inputs[0])]), -1), np.log( norm.pdf(values[1], inputs[1][:, :3], inputs[1][:, 3:])), log_prob_beta ], -1), -1) values[0] = np.expand_dims(values[0], -1) if fw == "torch": values = tree.map_structure(lambda s: torch.Tensor(s), values) # Test all flattened input. concat = np.concatenate(tree.flatten(values), -1).astype(np.float32) out = distr.logp(concat) if sess: out = sess.run(out) check(out, expected_log_llh, atol=15) # Test structured input. out = distr.logp(values) if sess: out = sess.run(out) check(out, expected_log_llh, atol=15) # Test flattened input. out = distr.logp(tree.flatten(values)) if sess: out = sess.run(out) check(out, expected_log_llh, atol=15)
from scipy.stats import beta import numpy as np import matplotlib.pyplot as plt import os import pyprobml_utils as pml x = np.linspace(0, 1, 100) aa = [0.1, 1., 2., 8.] bb = [0.1, 1., 3., 4.] props = ['b-', 'r:', 'b-.', 'g--'] for a, b, p in zip(aa, bb, props): y = beta.pdf(x, a, b) plt.plot(y, p, lw=3, label='a=%.1f,b=%.1f' % (a, b)) plt.legend(loc='upper left') pml.savefig('betaPlotDemo.png') plt.show()
import scipy from scipy.stats import beta alphas = [2, 2, 1, 1] betas = [2, 2, 1, 1] Ns = [4, 40, 4, 40] ks = [1, 10, 1, 10] plots = ['betaPostInfSmallSample', 'betaPostInfLargeSample', 'betaPostUninfSmallSample', 'betaPostUninfLargeSample'] x = np.linspace(0.001, 0.999, 50) for i in range(len(plots)): alpha_prior = alphas[i] beta_prior = betas[i] N = Ns[i] k = ks[i] alpha_post = alpha_prior + N - k beta_post = beta_prior + k alpha_lik = N - k + 1 beta_lik = k + 1 pl.plot(x, beta.pdf(x, alpha_prior, beta_prior), 'r-', label='prior Be(%2.1f, %2.1f)' % (alpha_prior, beta_prior)) pl.plot(x, beta.pdf(x, alpha_lik, beta_lik), 'k:', label='lik Be(%2.1f, %2.1f)' % (alpha_lik, beta_lik)) pl.plot(x, beta.pdf(x, alpha_post, beta_post), 'b-', label='post Be(%2.1f, %2.1f)' % (alpha_post, beta_post)) pl.legend(loc='upper left') pl.savefig(plots[i] + '.png') pl.show()
from scipy.stats import beta print(beta.pdf(2, 3, 3))
def dpbmm(data, num_iter, param=None): # Beta value Dirichlet process mixture model, with no gap algorithm s_data = np.shape(data) G = copy.copy(s_data[0]) ## number of genes C = copy.copy(s_data[1]) ## number of samples count_sum = 100 # parameters for G0, Gamma(a, b) a = 6 b = 5 if param is None: param = params(mu_a=3.2, mu_b=2.2, sigma2_a=5, sigma2_b=2, k=C, m_s=np.ones(C), s=np.arange(C), tau=5) param.param_val(C, G) for i in range(num_iter): new_param = copy.deepcopy(param) ## Gibbs sampling ## step 1: resample s s = copy.copy(new_param.s) k = copy.copy(new_param.k) m_s = copy.copy(new_param.m_s) alpha_val = copy.copy(new_param.alpha_val) beta_val = copy.copy(new_param.beta_val) tau = copy.copy(new_param.tau) mu_a = copy.copy(new_param.mu_a) mu_b = copy.copy(new_param.mu_b) sigma2a = copy.copy(new_param.sigma2_a) sigma2b = copy.copy(new_param.sigma2_b) for j in range(C): s, k = rearrange_s(s) m_s, k = calculate_m_s(s) if m_s[s[j]] == 1: u = np.random.rand() if u < (k - 1) / k: continue ind = np.where(s == (k - 1))[0] tmp = copy.copy(s[j]) s[ind] = copy.copy(tmp) s[j] = k - 1 tmp_alpha_val = copy.copy(alpha_val[:, tmp]) alpha_val[:, tmp] = copy.copy(alpha_val[:, k - 1]) alpha_val[:, k - 1] = copy.copy(tmp_alpha_val) tmp_beta_val = copy.copy(beta_val[:, tmp]) beta_val[:, tmp] = copy.copy(beta_val[:, k - 1]) beta_val[:, k - 1] = copy.copy(tmp_beta_val) m_s, k = calculate_m_s(s, c=j) p_x = np.zeros(k) for l in range(k): L_alpha_val, L_beta_val = exp_trans( alpha_val[:, l], beta_val[:, l]) p_x[l] = np.prod( beta.pdf(data[:, j], L_alpha_val, L_beta_val)) w = m_s / (tau + j) * p_x if k < C: L_alpha_val, L_beta_val = exp_trans( alpha_val[:, k], beta_val[:, k]) # w[k+1] = tau/(tau+j-1) * np.prod(beta(data[:, j], L_alpha_val, L_beta_val)) temp1 = np.prod( beta.pdf(data[:, j], L_alpha_val, L_beta_val)) p_x = np.append(p_x, temp1) w = np.append(w, tau / (tau + j) * temp1) population = np.arange(k + 1) else: population = np.arange(k) w = w / np.sum(w) s[j] = np.random.choice(population, size=1, p=w) else: # del m_s # del p_x # del w m_s, k = calculate_m_s(s, c=j) p_x = np.zeros(k) for l in range(k): L_alpha_val, L_beta_val = exp_trans( alpha_val[:, l], beta_val[:, l]) p_x[l] = np.prod( beta.pdf(data[:, j], L_alpha_val, L_beta_val)) w = m_s / (tau + j) * p_x if k < C: L_alpha_val, L_beta_val = exp_trans( alpha_val[:, k], beta_val[:, k]) p_x = np.append( p_x, np.prod(beta.pdf(data[:, j], L_alpha_val, L_beta_val))) w = np.append(w, p_x[k] * tau / (tau + j)) population = np.arange(k + 1) else: population = np.arange(k) w = w / np.sum(w) s[j] = np.random.choice(population, size=1, p=w) s, k = rearrange_s(s) m_s, k = calculate_m_s(s, c=0) # new_alpha_val = np.zeros([G, k]) # new_beta_val = np.zeros([G, k]) for g in range(G): for j in np.arange(k, C): alpha_val[g, j] = np.random.normal(loc=mu_a, scale=sigma2a, size=1) beta_val[g, j] = np.random.normal(loc=mu_b, scale=sigma2b, size=1) V_a = sigma2a * np.ones([G, G]) V_b = sigma2b * np.ones([G, G]) for j in range(k): new_alpha_val = np.random.multivariate_normal( mean=alpha_val[:, j], cov=V_a) new_beta_val = np.random.multivariate_normal(mean=beta_val[:, j], cov=V_b) count = 0 accept = 0 c_1 = 0 ind = np.where(s == j) while accept == 0: for count_ind in range(count_sum): p_xi = np.zeros(len(ind)) p_xi_t = np.zeros(len(ind)) new_alpha_val[g] = np.random.normal(loc=alpha_val[g, j], scale=sigma2a, size=1) # new_beta_val[g, i] = np.random.normal(loc=beta_val[g, i], scale=sigma2b, size=1) # ind = np.where(s == j) L_alpha_val, L_beta_val = exp_trans( new_alpha_val, beta_val[:, j]) L_alpha_val_t, L_beta_val_t = exp_trans( alpha_val[:, j], beta_val[:, j]) for m in range(len(ind)): p_xi[m] = np.sum( np.log( beta.pdf(data[:, ind[m]], L_alpha_val, L_beta_val))) p_xi_t[m] = np.sum( np.log( beta.pdf(data[:, ind[m]], L_alpha_val_t, L_beta_val_t))) sum_p_xi = np.sum(p_xi) sum_p_xi_t = np.sum(p_xi_t) fx = np.dot( norm.pdf(alpha_val[:, j], np.zeros(G), sigma2a), norm.pdf(beta_val[:, j], np.zeros(G), sigma2b)) fx_t = np.dot( norm.pdf(new_alpha_val, np.zeros(G), sigma2a), norm.pdf(beta_val[:, j], np.zeros(G), sigma2b)) # Metropolis Hastings sampling if sum_p_xi == sum_p_xi_t and sum_p_xi == -np.inf: if fx == fx_t and fx == 0: tmp = 0 else: tmp = np.exp(np.log(fx) - np.log(fx_t)) else: if fx == fx_t and fx == 0: tmp = np.exp(sum_p_xi - sum_p_xi_t) else: if fx == 0 and sum_p_xi_t == -np.inf: tmp = np.exp(sum_p_xi - np.log(fx_t)) elif fx_t == 0 and sum_p_xi == -np.inf: tmp = np.exp(fx - sum_p_xi_t) else: tmp = np.exp( np.log(fx) + sum_p_xi - np.log(fx_t) - sum_p_xi_t) u = np.random.rand() # count_ind += 1 if u < tmp: alpha_val[g, j] = new_alpha_val[g] count += 1 c_1 += 1 if count >= 3 or c_1 > 4: accept = 1 else: sigma2a *= 0.98 if sigma2a < 0.01: sigma2a = 0.01 if count > 20: sigma2a /= 0.98 accept = 0 count = 0 c_2 = 0 while accept == 0: for count_ind in range(count_sum): new_beta_val[g] = np.random.normal(loc=beta_val[g, j], scale=sigma2b, size=1) L_alpha_val, L_beta_val = exp_trans( alpha_val[:, j], new_beta_val) L_alpha_val_t, L_beta_val_t = exp_trans( alpha_val[:, j], beta_val[:, j]) p_xi = np.zeros(len(ind)) p_xi_t = np.zeros(len(ind)) for m in range(len(ind)): p_xi[m] = np.sum( np.log( beta.pdf(data[:, ind[m]], L_alpha_val, L_beta_val))) p_xi_t = np.sum( np.log( beta.pdf(data[:, ind[m]], L_alpha_val_t, L_beta_val_t))) sum_p_xi = np.sum(p_xi) sum_p_xi_t = np.sum(p_xi_t) fx = np.dot( norm.pdf(alpha_val[:, j], np.zeros(G), sigma2a), norm.pdf(beta_val[:, j], np.zeros(G), sigma2b)) fx_t = np.dot( norm.pdf(new_alpha_val, np.zeros(G), sigma2a), norm.pdf(new_beta_val, np.zeros(G), sigma2b)) # Metropolis Hastings sampling if sum_p_xi == sum_p_xi_t and sum_p_xi == -np.inf: if fx == fx_t and fx == 0: tmp = 0 else: tmp = np.exp(np.log(fx) - np.log(fx_t)) else: if fx == fx_t and fx == 0: tmp = np.exp(sum_p_xi - sum_p_xi_t) else: if fx == 0 and sum_p_xi_t == -np.inf: tmp = np.exp(sum_p_xi - np.log(fx_t)) elif fx_t == 0 and sum_p_xi == -np.inf: tmp = np.exp(fx - sum_p_xi_t) else: tmp = np.exp( np.log(fx) + sum_p_xi - np.log(fx_t) - sum_p_xi_t) u = np.random.rand() if u < tmp: beta_val[g, j] = new_beta_val[g] count += 1 c_2 += 1 if count >= 3 or c_2 > 4: accept = 1 else: sigma2b *= 0.98 if sigma2b < 0.01: sigma2b = 0.01 if count > 20: sigma2b /= sigma2b # step 3: resampling mixture weights pi if k == 1: continue m_s, k = calculate_m_s(s) pi = dirichletrnd(m_s + tau / k) # step 4: resampling concentration parameter tau r = np.random.beta(a=tau + 1, b=C, size=1) eta_r = 1 / (C * (b - np.log(r)) / (a + k - 1) + 1) tmp = np.random.rand() if tmp < eta_r: tau_new = np.random.gamma(shape=a + k, scale=b - np.log(r), size=1) else: tau_new = np.random.gamma(shape=a + k - 1, scale=b - np.log(r), size=1) tau = tau_new # step 5: update parameters for the usage in the new iteration new_param.s = s new_param.k = k new_param.m_s = m_s new_param.alpha_val = alpha_val new_param.beta_val = beta_val new_param.tau = tau new_param.pi = pi new_param.sigma2_a = sigma2a new_param.sigma2_b = sigma2b param = copy.deepcopy(new_param) file.write(str(i) + " iterations done") print(param.s) return param