def plot_like(self):
     plt.figure()
     xv = np.arange(0,1,0.01)
     yin = beta.pdf(xv,self.alp_in,self.bet_in)
     yout = beta.pdf(xv,self.alp_out,self.bet_out)
     plt.plot(xv,yin)
     plt.plot(xv,yout)
		def g(x):
			prob_xx=beta.cdf(x,alpha_param,beta_param)+z
			if prob_xx>1: #Numerical precision paranoia
				prob_xx=1
			xx=beta.ppf(prob_xx,alpha_param,beta_param)
			prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param)
			#print '   x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff)
			return(prob_diff)
def beta_proposal(current, var):
    alpha_beta_fwd = jmutils.beta_shape(current, var)
    proposed = beta.rvs(*alpha_beta_fwd)
    fwd_prob = beta.pdf(proposed, *alpha_beta_fwd)
    alpha_beta_back = jmutils.beta_shape(proposed, var)
    back_prob = beta.pdf(current, *alpha_beta_back)
    log_back_fwd = math.log(back_prob / fwd_prob)
    return proposed, log_back_fwd
Esempio n. 4
0
def bin_conj_prior():
    # Constants    
    global bin_data, bin_mean
    a, b = 2, 4
    m, l = 0, 0
    count = 0
    u_list = []
    domain = np.linspace(beta.ppf(0.01, a, b),
                    beta.ppf(0.99, a, b), 100)
    prior = beta.pdf(domain, a, b)
    
    fig, ax = plt.subplots(1, 1)
    
    # Update
    for i in bin_data:
        # Go through data
        count += 1
        if i == 1:
            m += 1
        else:
            l += 1
        
        # Calculate mean
        u = (a+m)/(a+m+l+b)
        u_list.append(u)
        
        # Calculate posterior 
        posterior = beta.pdf(domain, a+m, b+l)
        
        # MSE
        bin_mean_list = []      # Obtuse way to find MSE, but effective
        for i in range(len(u_list)):
            bin_mean_list.append(bin_mean)
        mse = np.array(u_list) - np.array(bin_mean_list)
        
        # Plot Prior
        plt.figure(2)
        plt.plot(domain, prior)
        plt.title('Binomial - Prior')
        plt.xlabel('Number of Observations')
        plt.ylabel('PDF')
        
        # Plot Posterior
        plt.figure(3)
        plt.title('Binomial - Posterior')
        plt.xlabel('Number of Observations')
        plt.ylabel('PDF')
        plt.xlim(0,1)
        if count % 5 == 0:
            plt.plot(domain, posterior)
             
        # Plot error
        plt.figure(4)
        plt.plot(mse)        
        plt.title('Binomial - MSE')
        plt.xlabel('Number of Observations')
        plt.ylabel('Error')
def priors_plot():
    x = sp.linspace(0,1,1000)
    y2 = beta.pdf(x,1,2)
    y3 = beta.pdf(x,10,20)
    y4 = beta.pdf(x,100,200)
    
    plt.plot(x,y2, label = r'$\alpha = 1, \beta = 2$')
    plt.plot(x,y3, label = r'$\alpha = 10, \beta = 20$')
    plt.plot(x,y4, label = r'$\alpha = 100, \beta = 200$')
    plt.legend()
    plt.savefig('priors.pdf')
Esempio n. 6
0
def prob_noisy_vec(given, bayes, noise_type, noise):
    if noise_type == "truncnorm":
        return tn_pdf_01(given, bayes, noise)
    elif noise_type == "beta":
        if hasattr(noise, "__len__"):        
            #return np.array([beta.pdf(g, *jmutils.beta_shape(bayes, n)) for g, n in zip(given, noise)])
            return np.array([beta.pdf(g, *mode_beta(bayes, n)) for g, n in zip(given, noise)])
        else:
            return beta.pdf(given, *jmutils.beta_shape(bayes, noise))
    else:
        print("Unknown noise type")
        sys.exit("prob_noisy_vec: Unknown noise type")
def beta_proposal_old(current, var):
    proposed = 0
    tries = 0
    while (proposed == 0) or (proposed == 1):
        proposed = beta.rvs(c * current, c * (1 - current))
        tries += 1
        if (tries > 1000):
            1/0
            print("Sampler is jammed")
    fwd_prob = beta.pdf(proposed, c * current, c * (1 - current))
    back_prob = beta.pdf(current, c * proposed, c * (1 - proposed))
    log_back_fwd = math.log(back_prob / fwd_prob)
    return proposed, log_back_fwd
def betaconv(res,alpha1, beta1, alpha2, beta2):
    #Set support
    import numpy as np
    from scipy.stats import beta
    x=np.arange(0,2.001,res)#0:res:2;
    #Individual Beta pdfs
    f1 = beta.pdf(x,alpha1,beta1)
    f2 = beta.pdf(x,alpha2,beta2)
    #Compute convolution
    y = np.convolve(f1, f2)
    #Reduce to [0..2] support
    y = y[0:len(x)]
    #Normalize (so that all values sum to 1/res)
    y = y / (sum(y) * res)
    return y
Esempio n. 9
0
def fit_beta_by_pt(nsteps):
    """Fit a beta distribution by parallel tempering."""
    num_dimensions = 1
    # Create the dummy model
    b = BetaFit(0.5, 0.5)

    # Create the options
    opts = MCMCOpts()
    opts.model = b
    opts.estimate_params = b.parameters
    opts.initial_values = [10 ** 0.5]
    opts.nsteps = nsteps
    opts.anneal_length = 0
    opts.T_init = 1
    opts.use_hessian = False
    opts.seed = 1
    opts.norm_step_size = 0.5
    opts.likelihood_fn = b.likelihood
    opts.step_fn = step

    # Create the MCMC object
    num_temps = 8
    pt = PT_MCMC(opts, num_temps, 10)
    pt.estimate()

    plt.ion()
    for chain in pt.chains:
        fig = plt.figure()
        chain.prune(nsteps/10, 1)
        (heights, points, lines) = plt.hist(chain.positions, bins=100,
                                            normed=True)
        plt.plot(points, beta.pdf(points, b.a, b.b), 'r')
        plt.ylim((0,10))
        plt.xlim((0, 1))
    return pt
Esempio n. 10
0
def fit_beta(nsteps):
    """Fit a beta distribution by MCMC."""
    num_dimensions = 1
    # Create the dummy model
    b = BetaFit(0.5, 0.5)

    # Create the options
    opts = MCMCOpts()
    opts.model = b
    opts.estimate_params = b.parameters
    opts.initial_values = [1.001]
    opts.nsteps = nsteps
    opts.anneal_length = nsteps/10
    opts.T_init = 100
    opts.use_hessian = False
    opts.seed = 1
    opts.norm_step_size = 0.01
    opts.likelihood_fn = b.likelihood
    opts.step_fn = step

    # Create the MCMC object
    mcmc = MCMC(opts)
    mcmc.initialize()
    mcmc.estimate()
    mcmc.prune(nsteps/10, 1)

    plt.ion()
    for i in range(mcmc.num_estimate):
        plt.figure()
        (heights, points, lines) = plt.hist(mcmc.positions[:,i], bins=100,
                                        normed=True)
        plt.plot(points, beta.pdf(points, b.a, b.b), 'r')
    return mcmc
def plot(a, b, trial, ctr):
  x = np.linspace(0, 1, 200)
  y = beta.pdf(x, a, b)
  mean = float(a) / (a + b)
  plt.plot(x, y)
  plt.title("Distributions after %s trials, true rate = %.1f, mean = %.2f" % (trial, ctr, mean))
  plt.show()
Esempio n. 12
0
def plot_beta(name, a, b, ret=None, n=None):
    print(a, b)
    theta = linspace(0, 1, 300)
    pdf = beta.pdf(theta, a, b)

    ax = axes()
    ax.plot(theta, pdf / max(pdf))
    if n is not None:
        ax.text(0.025, 0.9, 'TRADE IDEA %d' % n)
    if ret is not None:
        ax.text(0.025, 0.85, 'RETURN %s 0' % ('>' if ret else '<'))
    ax.set_title('P(hit rate | ideas so far)')
    ax.yaxis.set_ticks([])
    ax.grid()
    ax.legend()
    ax.xaxis.set_label_text('Hit Rate')
    ax.xaxis.set_ticks(linspace(0, 1, 11))
    s, e = (beta.ppf(0.025, a, b), beta.ppf(0.975, a, b))
    ax.fill([s, s, e, e, s], [0, 1, 1, 0, 0], color='0.9')

    gcf().set_size_inches(10, 6)
    savefig(name, bbox_inches='tight')

    plt.close()
    return
	def setBetaDistribution(self):
		"""
		"""
		choosePrimObj = [.25,.75]
		choosePrimObj = beta.pdf(choosePrimObj,1.3+self.primCount,1)
		choosePrimObj /= choosePrimObj.sum()
		self.choosePrimObj = choosePrimObj
Esempio n. 14
0
def bernoulli_posterior(data, prior):
    n_1 = sum(data)
    n_2 = len(data) - n_1
    x = np.arange(0, 1.01, step=.01)
    y = beta.pdf(x, prior[0] + n_1 - 1, prior[1] + n_2 - 1)
    plt.plot(x, y)
    plt.show()
Esempio n. 15
0
def plot_beta(a, b, fill = False):
    betap = partial(beta.pdf, a, b)
    xs = np.linspace(0,1, num = 150)
    #y = list(map(betap, x))
    y = [beta.pdf(x, a, b) for x in xs]
    fills = [ x <= 0.5 for x in xs]    


    #sio = cStringIO.StringIO()
    sio = io.BytesIO()
    #sio = io.StringIO()
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    #ax1.set_ylim(bottom=0)

    #ax1.set_ylim(bottom
    if fill: ax1.fill_between(xs ,y,0,where=fills  , color='0.8')
    ax1.plot(xs, y)
    fig.savefig(sio, format='png')

    enc = base64.b64encode(sio.getvalue())
    plt.close("all")
    #hex = enc.strip()
    hex = enc.decode('utf-8')
    #hex = sio.getvalue().encode("base64").strip()
    prob = beta.cdf(0.5, a, b)
    return hex, prob
def prob_noisy(given, bayes, noise_type, noise):
    """Prob of saying given when rational bayesian would say bayes
    Args: given: in [0,1], what respondent said
          bayes: in [0, 1]
          model_params: dict, incl keys with noise info
    Returns: prob in [0, 1]
    Different types of noise models: beta, binomial, noiseless.
    """
    if noise_type == "noiseless":
        tolerance = 1e-04
        if jmutils.fuzzy_equals(given, bayes, tolerance):
            return 1
        else:
            return 0
    elif noise_type == "binomial":
        num_trials = noise
        num_successes = int(given * num_trials)
        #return binom.pmf(num_successes, num_trials, bayes)
        return my_binom_pmf(num_successes, num_trials, bayes)
    elif noise_type == "beta":
        print("Optimize the beta stuff almost certainly!!!")
        alpha_beta = jmutils.beta_shape(bayes, noise)
        return beta.pdf(given, *alpha_beta)
    elif noise_type == "truncnorm":
        scale = noise
        #return truncnorm.pdf(given, -bayes / scale, (1 - bayes) / scale, 
        #                     loc=bayes, scale=scale)
        return mytruncpdf_01bounds(given, bayes, noise)
    else:
        print("Error: meta noise_type not specified correctly")
        sys.exit()
def BernBeta(priorBetaAB,Data):
    '''
    priorBetaAB: Tuple of beta(a,b) parameters
    '''
    # For notational convenience, rename components of priorBetaAB:
    a = priorBetaAB[0]
    b = priorBetaAB[1]
    fig, ax = plt.subplots(3, 1,figsize=(16, 16))
    Theta = np.arange(0.001,1, 0.001) # points for plotting
    pTheta = beta.pdf(Theta, a, b) # prior for plotting
    ax[0].fill_between(Theta, beta.pdf(Theta, a, b))
    ax[0].set(ylabel = 'test',title = 'Prior(beta)')
    ax[1].fill_between(Theta, beta.pdf(Theta, a, b))
    ax[1].set(xlabel = 'test',title = 'Prior(beta)')
    ax[2].fill_between(Theta, beta.pdf(Theta, a, b))
    ax[2].set(xlabel = 'test',title = 'Prior(beta)')
def plot(bandits, trial):
    x = np.linspace(0, 1, 200)
    for b in bandits:
        y = beta.pdf(x, b.a, b.b)
        plt.plot(x, y, label="real p: %.4f" % b.p)
    plt.title("Bandit distributions after %s trials" % trial)
    plt.legend()
    plt.show()
		def wat(x, s_n, ar):
			f = 1
			for i in xrange(len(ar)):
				if i is s_n:
					f = f*beta.pdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1)	#f(Sa|Dt)	#Dt is information available before reward
				else:
					f = f*beta.cdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1)	#F(S < Sa|Dt)	#D is information available before reward
			return f
Esempio n. 20
0
def one_vote(N, threshold=0.5, ab=False, forecasts=False, normal=False, p=False, diagnostic=False):
	import numpy as np
	if sum(map(bool,[ab, forecasts, normal, p])) != 1:
		raise ValueError("Please specify one and only one of the 'ab', 'forecasts', 'normal', or 'p' options.")

	if ab:
		a, b = ab
	elif forecasts:
		from scipy.stats import beta
		a, b, _, _ = beta.fit(forecasts, floc=0, fscale=1)
	elif normal:
		from functions import fit_beta_to_normal
		m, s = normal
		a, b = fit_beta_to_normal(m,s)
	else:
		pass

	if p:
		from functions import mp_binom
		victory_pr = mp_binom(np.ceil(N*threshold),N,p)
		if (N*threshold).is_integer():
			# tie probability in case N*threshold is whole:
			tie_pr = mp_binom(N*(1-threshold)+1,N,p)
		elif not (N*threshold).is_integer() and threshold != 0.5:
			# tie probability in case N*threshold is not whole:
			tie_pr = mp_binom(N*(1-threshold),N,p)
		else:
			tie_pr = 0
	elif a and b:
		from functions import beta_binomial
		victory_pr = beta_binomial(np.ceil(N*threshold),N,a,b,multi_precission=True)
		if (N*threshold).is_integer():
			# tie probability in case N*threshold is whole:
			tie_pr = beta_binomial(N*(1-threshold)+1,N,a,b,multi_precission=True)
		elif not (N*threshold).is_integer() and threshold != 0.5:
			# tie probability in case N*threshold is not whole:
			tie_pr = beta_binomial(N*(1-threshold),N,a,b,multi_precission=True)
		else:
			tie_pr = 0
	else:
		pass

	if diagnostic:
		import matplotlib.pyplot as plt
		x = np.linspace(0,1,1000)
		try:
			plt.style.use('http://chymera.eu/matplotlib/styles/chymeric-gnome.mplstyle')
		except ValueError:
			plt.style.use('ggplot')
		plt.axvline(x=threshold, color="#fbb4b9", linewidth=1)
		plt.legend(['percentage\n threshold'], loc='upper right')
		plt.plot(x, beta.pdf(x,a,b))
		plt.xlabel('Reference Candidate Vote Share')
		plt.ylabel('PDF')
		plt.show()

	total_pr = victory_pr+tie_pr
	return total_pr, victory_pr, tie_pr
def main():
    """ Two beta distributions with the same mean but differing variance.

    The distributions reflect kicking averages of a player who has been very
    consistent through his career verses another who has had a range of better
    and worse years. On the x-axis, we see the expected score ratio over the
    season - the y-axis indicating the likelihood of that average

    The prime graphs (lighter colouring) show the change in belief for the
    players' expected season average after 4 kicks (trials) where only 1 was
    successful. The more consistent player's season expectation changes very
    little relative to the more varied player.

    Adapted from this explanation:
    http://varianceexplained.org/statistics/beta_distribution_and_baseball/

    """
    a_1 = 61
    b_1 = 20
    a_2 = 610
    b_2 = 200

    this_season_scores = 1
    this_season_misses = 3

    print(mean(a_1, b_1))
    print(mean(a_2, b_2))

    x = np.linspace(0, 1, 1000)
    y_1 = beta.pdf(x, a_1, b_1)
    y_2 = beta.pdf(x, a_2, b_2)
    y_1_prime = beta.pdf(x, a_1 + this_season_scores, b_1 + this_season_misses)
    y_2_prime = beta.pdf(x, a_2 + this_season_scores, b_2 + this_season_misses)

    plt.plot(x, y_1,       'r', lw=2, alpha=0.8, label='a =  61, b =  20')
    plt.plot(x, y_1_prime, 'r', lw=2, alpha=0.2, label='a =  62, b =  23')
    plt.plot(x, y_2,       'b', lw=2, alpha=0.8, label='a = 610, b = 200')
    plt.plot(x, y_2_prime, 'b', lw=2, alpha=0.2, label='a = 611, b = 203')

    plt.xlim(0.3, 1.0)
    plt.xlabel('p(scoring)')
    plt.ylabel('probability density')
    plt.legend(loc='upper left')
    plt.show()
def betadist(betaparams,B,pcF):
    #defining beta distribution parameters
    a=float(betaparams['a'].value)
    b=float(betaparams['b'].value)
    loc=float(betaparams['loc'].value)
    scale=float(betaparams['scale'].value)
    #creating fitted data
    model_pcF=beta.pdf(B,a,b,loc=loc,scale=scale)         
    #returning residual
    return (model_pcF-pcF)
Esempio n. 23
0
def plotBayesian(aprior, bprior, apost, bpost):
	x = np.linspace(0.2, 0.8, 1001)
	prior = beta.pdf(x, aprior, bprior)
	post = beta.pdf(x, apost, bpost)
	plt.plot(x, prior, color='m', linewidth = 3)
	plt.plot(x, post, color='g', linewidth = 3)
	priormax = aprior/(aprior+bprior)
	postmax = apost/(apost+bpost)
	plt.plot([priormax, priormax], [0, 6], color='r', linewidth=3)
	plt.plot([postmax, postmax], [0, 6], color='r', linewidth=3)
	plt.title('Prior and Posterior Probability Distrubution for Florida', fontsize=36)
	plt.xlabel('Probability that Obama wins', fontsize=28)
	plt.ylabel('Probability Density', fontsize=28)
	plt.tick_params(labelsize=20)
	magenta_line = mpatches.Patch(color='magenta', label='Prior Distribution')
	green_line = mpatches.Patch(color='green', label='Posterior Distribution')
	red_line = mpatches.Patch(color='red', label='Most Likely Probability')
	plt.legend(handles=[magenta_line, green_line, red_line], prop={'size': 24})
	plt.show()
Esempio n. 24
0
 def pEmission(self,z,x):
     """
     Returns a number proportional to the probability of x given z
     """
     # TODO: each poll should be chosen according to a multinomial distribution rather than a dirichlet distribution
     res = 1
     for i in xrange(self.num_polls):
         alpha = self.b[i]*z
         res *= Beta.pdf(x[i,0], alpha[0], alpha[1])
     return res
Esempio n. 25
0
def main():
    a =  81
    b = 219
    stats = beta.stats(a, b, moments='mvsk')
    print('Mean: %.3f, Variance: %.3f, Skew: %.3f, Kurtosis: %.3f' % stats)
    x = np.linspace(0, 1, 1000)
    y = beta.pdf(x, a, b)
    plt.plot(x, y, 'r-', lw=2, alpha=0.6, label='beta pdf')
    plt.legend()
    plt.show()
Esempio n. 26
0
def plot_stats():
    data = request.form
    x = np.linspace(0.1,0.6,200)
    y_a = beta.pdf(x, 1 + int(data['conversion_a']), 1 + int(data['allocation_a']) - int(data['conversion_a']))
    y_b = beta.pdf(x, 1 + int(data['conversion_b']), 1 + int(data['allocation_b']) - int(data['conversion_b']))

    plt.plot(x, y_a, label='A')
    plt.plot(x, y_b, label='B')

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)

    image_data = base64.b64encode(buf.read())

    response = Response(response=image_data, status=200)
    buf.close()
    plt.close()

    return response
Esempio n. 27
0
def _nll(param, data):
    """
    Negative log likelihood function for beta distribution
    """
    from scipy.stats import beta
    a, b = param
    pdf = beta.pdf(data, a, b)
    lg = np.log(pdf)
    mask = np.isfinite(lg)
    nll = -lg[mask].sum()

    return nll
	def inferPosterior(self, state, action, prior='uniform'):
		"""
			Uses inference engine to compute posterior probability from the 
			likelihood and prior (beta distribution).
		"""

		if prior == 'beta':
			# Beta Distribution
			self.prior = np.linspace(.01,1.0,101)
			self.prior = beta.pdf(self.prior,1.4,1.4)
			self.prior /= self.prior.sum()

		elif prior == 'shiftExponential':
			# Shifted Exponential
			self.prior = np.zeros(101)
			for i in range(50):
				self.prior[i + 50] = i * .02
			self.prior[100] = 1.0
			self.prior = expon.pdf(self.prior)
			self.prior[0:51] = 0
			self.prior *= self.prior
			self.prior /= self.prior.sum()

		elif prior == 'shiftBeta':
			# Shifted Beta
			self.prior = np.linspace(.01,1.0,101)
			self.prior = beta.pdf(self.prior,1.2,1.2)
			self.prior /= self.prior.sum()
			self.prior[0:51] = 0

		elif prior == 'uniform':
			# Uniform
			self.prior = np.zeros(len(self.sims))	
			self.prior = uniform.pdf(self.prior)
			self.prior /= self.prior.sum()


		self.posterior = self.likelihood * self.prior
		self.posterior /= self.posterior.sum()
Esempio n. 29
0
    def set_prior(self, theta_min, theta_max, num_theta, dist_name):
        self.theta_val = np.linspace(theta_min, theta_max, num=num_theta)
        self.num_theta = num_theta
        if dist_name == 'uniform':
            # the density is uniform
            self.theta_density = np.ones(num_theta) / num_theta

        elif dist_name == 'beta':
            # centered beta
            # rescale to move away from the boundary
            self.theta_density = beta.pdf((self.theta_val - theta_min) / (theta_max - theta_min + 0.1), 2, 2)
            # renormalize
            self.theta_density = self.theta_density / sum(self.theta_density)
        else:
            raise Exception('Unknown prior distribution.')
Esempio n. 30
0
def betaNLL(param,*args):
    '''Negative log likelihood function for beta
    <param>: list for parameters to be fitted.
    <args>: 1-element array containing the sample data.

    Return <nll>: negative log-likelihood to be minimized.
    '''

    a,b=param
    data=args[0]
    pdf=beta.pdf(data,a,b,loc=0,scale=1)
    lg=np.log(pdf)
    #-----Replace -inf with 0s------
    lg=np.where(lg==-np.inf,0,lg)
    nll=-1*np.sum(lg)
    return nll
 def get_pdf(self, x):
     return beta.pdf(x, self.alpha, self.beta)
Esempio n. 32
0
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
from scipy.stats import beta


fig, ax = plt.subplots(1, 1, figsize=(3.5, 4.5))
plt.tick_params(axis='both', which='major', labelsize=14)

a, b = .5, 1.5
x = np.linspace(beta.ppf(0.00, a, b), beta.ppf(1, a, b), 100)
#     ax.plot(x, beta.pdf(x, a, b),'k.', lw=2, alpha=1, label='beta pdf')

a, b = 4, 12
x = np.linspace(beta.ppf(0., a, b), beta.ppf(1, a, b), 100)
ax.plot(x, beta.pdf(x, a, b),'k:', lw=2, alpha=1, 
        label='$(4, 12)$')


a, b = 1, 3
x = np.linspace(beta.ppf(0.05, a, b), beta.ppf(1, a, b), 100)
ax.plot(x, beta.pdf(x, a, b),'k--', lw=2, alpha=1, label='$(1, 3)$')

a, b = 2, 6
x = np.linspace(beta.ppf(0.00, a, b), beta.ppf(1, a, b), 1000)
ax.plot(x, beta.pdf(x, a, b),'k-', lw=2, alpha=1, label='$(2, 6)$')

a, b = 0.25, .75
x = np.linspace(beta.ppf(0.3, a, b), beta.ppf(.99, a, b), 200)
ax.plot(x, beta.pdf(x, a, b),'k-.', lw=2, alpha=1, label='$(0.25, 0.75)$')
Esempio n. 33
0
 def z_prob(z):
     return ([
         beta.pdf(xv, a1, b1) * beta.pdf(xv / z, a2, b2)
         for xv in np.linspace(0.1, 1, 100)
     ])
mpl.rc("font", family="serif")


def likelihood(theta, n, k):
    return binom(n, k) * theta**k * (1 - theta)**(n - k)


n = 11
k = 8
a = 2
b = 2

X = np.linspace(0, 1, num=1000)
t = likelihood(
    X, n, k) * gamma(n + 2) / (gamma(k + 1) * gamma((n - k) + 1) * binom(n, k))
prior = beta.pdf(X, a, b)
posterior = beta.pdf(X, a + k, b + (n - k))

fig, ax = plt.subplots(figsize=(7, 7 / 1.4))
y_max = 4

turq = mpl.colors.to_rgb("turquoise")
mag = mpl.colors.to_rgb("magenta")
mix = [(turq[i] + mag[i]) / 2 for i in range(3)]
ax.plot(X, prior, color=turq, label="Prior", zorder=2)

ax.plot(X, t, color=mag, label="Likelihood (normalized)", zorder=2)

ax.plot(X, posterior, color=mix, label="Posterior", zorder=2)
theta_map = (a + k - 1) / (a + b + n - 2)
posterior_max = beta.pdf(theta_map, a + k, b + (n - k))
#And compare the histogram:
ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()

#beta Continuous distributions¶
from scipy.stats import beta
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
#Calculate a few first moments:
a, b = 2.31, 0.627
mean, var, skew, kurt = beta.stats(a, b, moments='mvsk')
#Display the probability density function (pdf):
x = np.linspace(beta.ppf(0.01, a, b), beta.ppf(0.99, a, b), 100)
ax.plot(x, beta.pdf(x, a, b), 'r-', lw=5, alpha=0.6, label='beta pdf')
#Alternatively, the distribution object can be called (as a function) to fix the shape, location and scale parameters. This returns a “frozen” RV object holding the given parameters fixed.
#Freeze the distribution and display the frozen pdf:
rv = beta(a, b)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
#Check accuracy of cdf and ppf:
vals = beta.ppf([0.001, 0.5, 0.999], a, b)
np.allclose([0.001, 0.5, 0.999], beta.cdf(vals, a, b))
True
#Generate random numbers:
r = beta.rvs(a, b, size=1000)
#And compare the histogram:
ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
Esempio n. 36
0
    if '-Nexp' in sys.argv:
        p = sys.argv.index('-Nexp')
        M = int(sys.argv[p+1])
        if M > 0:
            M_exp = M
        else:
            print('Enter positive Nexp arg.')
 


    #the probability distribution from which to sample --> beta function with parameters a, b
    a,b=7,3
    
    x = np.arange (0.01, 1, 0.01)
    y = scipybeta.pdf(x,a,b)
    plt.figure()
    plt.plot(x,y,label = r'$\alpha =$' + str(a) +', '+ r'$\beta =$' + str(b))
    plt.xlabel('x',fontsize=16)
    plt.ylabel(r'Beta($\alpha, \beta$)',fontsize=16)
    plt.legend(fontsize=10)
    plt.title('Non-Normalized, Continuous Prob Distribution',fontsize=15)
    plt.show()


    average_list = []
    for i in range(0,M_exp):    
        vals=[]
        for i in range(0,N_samples):
            vals.append(npbeta(a,b))
        vals = np.asarray(vals)    
Esempio n. 37
0
#Q1
#Two lists alpha1 and alpha2 contains 5 pairs of values with (a1,a2) pair as (alpha1[i],alpha2[i]) for i from 0 to 4
alpha1 = [2, 1, 3, 4, 7]
alpha2 = [3, 9, 5, 4, 5]

#Q2
#x_star list containing x* values for each pair
x_star = []
for i in range(5):
    x_star.append((alpha1[i] - 1) / (alpha1[i] + alpha2[i] - 2))

#Q3
#f list contains f(x*) values for respective x* values
f = []
for i in range(5):
    c = beta.pdf(x_star[i], alpha1[i], alpha2[i])
    f.append(c)
    # print(c)
print(x_star)
print(f)
#Q4 and Q5
#acceptance rejection method and histogram plots
bin = [
    0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65,
    0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1
]
for i in range(5):
    c = f[i]
    U1 = []
    a = nm.random.random_sample()
    f_a = beta.pdf(a, alpha1[i], alpha2[i])
Esempio n. 38
0
    'scale': 10,
    'mixture': 0.8
}

# Proposed values are a Gaussian peturbation away from the previous values.
# This is controlled by the sigma of the gaussian, which is defined for each variable
proposal_sigma = {
    'missing': 0.025,
    'shape': 0.05,
    'scale': 2,
    'mixture': 0.025,
}

# PRIORS
priors = (lambda x: {
    'missing': beta.pdf(x['missing'], a=3, b=15),
    'mixture': beta.pdf(x['mixture'], a=1.1, b=1.1),
    'shape': gma.pdf(x['shape'], a=10, scale=1 / 5),
    'scale': gma.pdf(x['scale'], a=6, scale=50)
})


def test_mcmc():
    folder = os.path.dirname(os.path.abspath(__file__))
    file = "/mcmc_test_chain"

    chain = mcmc.run_MCMC(data=am_data,
                          initial_parameters=initial_parameters,
                          proposal_sigma=proposal_sigma,
                          priors=priors,
                          thin=1,
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import beta, norm

T = 501
true_ctr = 0.5
a, b = 1, 1
plot_indices = (10, 20, 30, 50, 100, 200, 500)
data = np.empty(T)

for i in range(T):
    x = 1 if np.random.random() < true_ctr else 0
    data[i] = x

    a += x
    b += 1 - x

    if i in plot_indices:
        p = data[:i].mean()
        n = i + 1
        std = np.sqrt(p * (1 - p) / n)

        x = np.linspace(0, 1, 200)
        g = norm.pdf(x, loc=p, scale=std)
        plt.plot(x, g, label="Gaussian Approximation")

        posterior = beta.pdf(x, a=a, b=b)
        plt.plot(x, posterior, label="Beta Posterior")
        plt.legend()
        plt.title(f"N = {n}")
        plt.show()
Esempio n. 40
0
    def get_arrays_for_plotting(self):

        to_plot = np.linspace(0, 1, 100)
        y_plot = beta_dist.pdf(to_plot, self.alpha, self.beta)

        return to_plot, y_plot
                        label="true pdf")
        ax[0, col].plot(xgrid,
                        pdf_est * n_samples * (xgrid[1] - xgrid[0]),
                        'r',
                        label="estimated pdf")
        if i == 0:
            ax[0, i].legend()
        ax[0, col].set_title(
            ("After %d iterations\n" +
             "($\\mathrm{E}_q[\\tau]$=%.3f, $\\mathrm{E}_q[\\theta]$=%.3f)") %
            (i + 1, tau_est, theta_est))
        ax[0, col].set_xlabel("$x$")

        # plot marginal distribution of tau
        tau = np.linspace(0, 1.0, 1000)
        q_tau = beta.pdf(tau, N2 + alpha0, N1 + alpha0)
        ax[1, col].plot(tau, q_tau)
        ax[1, col].set_xlabel("$\\tau$")

        # plot marginal distribution of theta
        theta = np.linspace(-4.0, 8.0, 1000)
        q_theta = norm.pdf(theta, m2, np.sqrt(1 / beta2))
        ax[2, col].plot(theta, q_theta)
        ax[2, col].set_xlabel("$\\theta$")
        col = col + 1

# finalize the plot
ax[1, 0].set_ylabel("$q(\\tau)$")
ax[2, 0].set_ylabel("$q(\\theta)$")
plt.tight_layout()
plt.show()
Esempio n. 42
0
        plt.legend(loc='best')
        plt.show()

a_1 = np.linspace(0,10,100)
a_2 = np.linspace(0,10,100)
b_1 = np.linspace(0,10,100)
b_2 = np.linspace(0,10,100)
pi = np.linspace(0,1,10)
input_space = np.linspace(0,1,1000)
for i in range(5):
	pi_rvs = randint.rvs(0,10)
	a_1_rvs = randint.rvs(0,100)
	a_2_rvs = randint.rvs(0,100)
	b_1_rvs = randint.rvs(0,100)
	b_2_rvs = randint.rvs(0,100)
	bibeta_example_pdf = pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs])
	bibeta_example_cdf = pi[pi_rvs]*beta.cdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.cdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs])
	ax = plt.subplot(111)
        ax.plot(input_space, pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]), label="1 comp pdf")
        ax.plot(input_space, (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="2 comp pdf")
        ax.plot(input_space, pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="Mix pdf")
	plt.legend(loc='best')
        plt.show()
	ax = plt.subplot(111)
	ax.plot(input_space, pi[pi_rvs]*beta.cdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]), label="1 comp cdf")
	ax.plot(input_space, (1-pi[pi_rvs])*beta.cdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="2 comp cdf")
	ax.plot(input_space, pi[pi_rvs]*beta.cdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.cdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs]), label="Mix cdf")
	plt.legend(loc='best')
        plt.show()

bibeta_example_pdf = pi[pi_rvs]*beta.pdf(input_space,a_1[a_1_rvs],b_1[b_1_rvs]) + (1-pi[pi_rvs])*beta.pdf(input_space,a_2[a_2_rvs],b_2[b_2_rvs])
Esempio n. 43
0
 def p_z(z, include_gt):
     return np.sum([
         beta.pdf(xv, a1, b1) * beta.pdf(xv / z, a2, b2)
         for xv in np.linspace(0.1, 1, 100)
     ]) + (gt_prob * beta.pdf(z, 60, 3) if include_gt else 0)
Esempio n. 44
0
def get_gene_info(
    *,
    annotated_vcf,
    variant_col,
    af_col,
    alt_col='Alt',
    del_col,
    output_dir,
    genes_col,
    maf_threshold=0.01,
    beta_param,
    weight_func='beta'
):
    """
    Create temporary files with variant information for each gene, plus the weights calculated.

    Parameters
    ----------
    annotated_vcf : str
        a file containing the variant, AF, ALT, Gene, and deleterious score.
    variant_col : str
        the name of the variant column.
    af_col : str
        the name of the Allele Frequency column.
    alt_col : str
        the name of the alternate allele column.
    del_col : str
        the name of functional annotation column.
    output_dir : str
        directory to save in temporary files.
    genes_col : str
        the name of genes column.
    maf_threshold : float
        between [0.0-1.0]. the minor allele frequency threshold, default is 0.01.
    beta_param : tuple
        the parameters of the beta function, if chosen for weighting.
    weight_func : str
        the weighting function, beta or log10.

    Returns
    -------
        output directory with all the temporary files.

    """
    skip = 0
    if annotated_vcf.endswith('.gz'):
        with gzip.open(annotated_vcf, 'r') as fin:
            for line in fin:
                if line.decode('utf-8').startswith('##'):
                    skip += 1
    else:
        with open(annotated_vcf, 'r') as file:
            for line in file:
                if line.startswith('##'):
                    skip += 1
    df = pd.read_csv(annotated_vcf, usecols=[variant_col, alt_col, 'INFO'], skiprows=skip, sep=r'\s+', index_col=False)
    info = df['INFO'].str.split(pat=';', expand=True)
    missing_info = info[info.isnull().any(axis=1)].index
    df.drop(missing_info, inplace=True)
    df.reset_index(drop=True, inplace=True)
    info.drop(missing_info, inplace=True)
    info.reset_index(drop=True, inplace=True)
    for col in info.columns:
        val = info[col][0].split('=')
        if len(val) == 1:
            continue
        info.rename(columns={col: val[0]}, inplace=True)
        info[val[0]] = info[val[0]].str.replace(val[0] + "=", "")
    df = pd.concat([df, info], axis=1)
    df = df[df[af_col].values.astype(float) < maf_threshold]
    df.replace('.', 0.0, inplace=True)
    if weight_func == 'beta':
        df[weight_func] = beta.pdf(df[af_col].values.astype(float), beta_param[0], beta_param[1])
    elif weight_func == 'log10':
        df[weight_func] = -np.log10(df[af_col].values.astype(float))
        df[weight_func].replace([np.inf, -np.inf, np.nan], 0.0, inplace=True)
    df['score'] = df[weight_func].values.astype(float) * df[del_col].values.astype(float)
    genes = list(set(df[genes_col]))
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    gene_file = output_dir + '.genes'
    with open(gene_file, 'w') as f:
        f.writelines("%s\n" % gene for gene in genes)
    [df[df[genes_col] == gene][[variant_col, alt_col, 'score', genes_col]].to_csv(os.path.join(output_dir, (
        str(gene) + '.w')), index=False, sep='\t') for gene in tqdm(genes, desc="writing w gene files")]
    [df[df[genes_col] == gene][[variant_col, alt_col]].to_csv(os.path.join(output_dir, (str(gene) + '.v')),
                                                              index=False, sep='\t') for gene in
     tqdm(genes, desc="writing v gene files")]
    return output_dir
Esempio n. 45
0
    ## into a single number.  This is not completely trivial, as you
    ## need to combine the negative and positive Z into it, but I
    ## think you can all work it out.

    P_dependent *= P_D_positive * P_D_negative
    P_independent *= P_D

    print(
        "Now calculate a posterior distribution for the relevant Bernoulli parameter. Focus on just one value of y for simplicity"
    )

    # First plot the joint distribution
    prior_alpha = 1
    prior_beta = 1
    xplot = np.linspace(0, 1, 200)
    pdf_p = beta.pdf(xplot, prior_alpha + positive_alpha,
                     prior_beta + positive_beta)
    pdf_n = beta.pdf(xplot, prior_alpha + negative_alpha,
                     prior_beta + negative_beta)
    pdf_m = beta.pdf(xplot, prior_alpha + positive_alpha + negative_alpha,
                     prior_beta + positive_beta + negative_beta)
    n_figures += 1
    plt.figure(n_figures)
    plt.clf()
    plt.plot(xplot, pdf_p)
    plt.plot(xplot, pdf_n)
    plt.plot(xplot, pdf_m)
    plt.legend(["z=1", "z=-1", "marginal"])
    plt.title("y=" + str(y))

print("Probability of independence: ",
      P_independent / (P_independent + P_dependent))
Esempio n. 46
0
import numpy as np
from scipy.stats import beta
import matplotlib.pylab as plt

x = np.linspace(0, 1)
unif = beta.pdf(x, 1, 1)
cent = beta.pdf(x, 2.3, 2.3)
cent2 = beta.pdf(x, 12, 12)
skewed = beta.pdf(x, 3, 1)

plt.figure(figsize=(13, 4))
plt.subplot(1, 5, 1)
plt.plot(x, unif)
plt.ylim((0, 4))
plt.title('(A)')
plt.ylabel('rozdělení $p$')
plt.xlabel('p')
plt.subplot(1, 5, 2)
plt.plot(x, cent)
plt.title('(B1)')
plt.xlabel('p')
plt.ylim((0, 4))
plt.subplot(1, 5, 3)
plt.plot(x, cent2)
plt.title('(B2)')
plt.xlabel('p')
plt.ylim((0, 4))
plt.subplot(1, 5, 4)
plt.plot(x, skewed)
plt.ylim((0, 4))
plt.xlabel('p')
Esempio n. 47
0
在程序中,使用a代表alpha,b代表beta
一个确定的概率密度函数,有其均值和标准差
通过beta分布的alpha和beta可以直接求出此分布的mu和sigma
通过其mu和sigma也可以求出其alpha和beta
'''
import numpy as np
from scipy.stats import beta
import matplotlib.pyplot as plt
import seaborn as sns

if __name__ == "__main__":
    sns.set_palette("deep", desat=.6)
    sns.set_context(rc={"figure.figsize": (8, 4)})
    x = np.linspace(0, 1, 100)
    params = [(0.5, 0.5), (1.0, 1.0), (4.0, 3.0), (2.0, 5.0), (6.0, 6.0)]
    for p in params:
        y = beta.pdf(x, p[0], p[1])
        a = p[0]  #mid alpha
        b = p[1]  #mid beta
        u = a / (a + b)  #mid mu
        s = ((a * b) / ((a + b)**2 * (a + b + 1)))**0.5  #mid sigma
        plt.plot(
            x,
            y,
            label="$\\alpha=%.4f$, $\\beta=%.4f$, $\\mu=%.4f$, $\\sigma=%.4f$"
            % (a, b, u, s))
    plt.xlabel("$\\theta$, Fairness")
    plt.ylabel("Density")
    plt.legend(title="Parameters")
    plt.show()
Esempio n. 48
0
def sigma_lnprior(sigma, alpha_value, beta_value):


    return np.log(beta.pdf(abs(sigma), alpha_value, beta_value))
Esempio n. 49
0
 def f_beta(thetal, mu, nu):
     return beta.pdf(t(thetal), mu, nu)*2./np.pi
def coin_flip_posterior(h, n, r):
    return beta.pdf(h, n + 1, n - r + 1)
 def pdf(self, grades):
     assert self.a is not None and self.b is not None, 'Params have not been set. First run .fit()'
     x = np.linspace(0, 1, 101)
     return x, beta.pdf(x, self.a, self.b)
Esempio n. 52
0
def plot_beta_distribution(a, b):
    x = np.linspace(0, 1, 1000)
    y = beta.pdf(x, a=a, b=b)
    plt.plot(x, y, lw=3, alpha=0.7, label='a=%s, b=%s' % (a, b))
Esempio n. 53
0
    20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 17, 20, 20, 20,
    20, 19, 19, 18, 18, 25, 24, 23, 20, 20, 20, 20, 20, 20, 10, 49, 19,
    46, 27, 17, 49, 47, 20, 20, 13, 48, 50, 20, 20, 20, 20, 20, 20, 20,
    48, 19, 19, 19, 22, 46, 49, 20, 20, 23, 19, 22, 20, 20, 20, 52, 46,
    47, 24, 14
])
M = len(y)


# plot the separate and pooled models
plt.figure(figsize=(8,10))
x = np.linspace(0, 1, 250)

# separate
plt.subplot(2, 1, 1)
lines = plt.plot(x, beta.pdf(x[:,None], y[:-1] + 1, n[:-1] - y[:-1] + 1),
                 linewidth=1)
# highlight the last line
line1, = plt.plot(x, beta.pdf(x, y[-1] + 1, n[-1] - y[-1] + 1), 'r')
plt.legend((lines[0], line1),
           (r'Posterior of $\theta_j$', r'Posterior of $\theta_{71}$'))
plt.yticks(())
plt.title('separate model')

# pooled
plt.subplot(2, 1, 2)
plt.plot(x, beta.pdf(x, y.sum() + 1, n.sum() - y.sum() + 1),
         linewidth=2, label=(r'Posterior of common $\theta$'))
plt.legend()
plt.yticks(())
plt.xlabel(r'$\theta$', fontsize=20)
Esempio n. 54
0
beta_params_a = [1] * latent_dim
beta_params_a[0] = alpha
beta_params_b = [1] * latent_dim

#############################################################
# BUILD DATA SET

if data_set == 1:
    # X ~ MP = Unif <-- Observed, thinned.
    # Y ~ P  = Beta <-- Target, unthinned.
    # Weights = 1/M = P/(MP) = Beta/Unif = Beta
    latent = np.random.uniform(0, 1, size=(data_num, latent_dim))
    latent_unthinned = np.random.beta(beta_params_a, beta_params_b,
                                      (data_num, latent_dim))
    weights = vert(beta.pdf(latent[:, 0], alpha, 1.))
    weights_unthinned = vert(beta.pdf(latent_unthinned[:, 0], alpha, 1.))

    fixed_transform = np.random.normal(0, 1, size=(latent_dim, data_dim))
    data = np.dot(latent, fixed_transform)
    data_unthinned = np.dot(latent_unthinned, fixed_transform)

elif data_set == 2:
    # X ~ MP = Beta <-- Observed, thinned.
    # Y ~ P  = Unif <-- Target, unthinned.
    # Weights = 1/M = P/(MP) = Unif/Beta = 1/Beta
    latent = np.random.beta(beta_params, beta_params, (data_num, latent_dim))
    latent_unthinned = np.random.uniform(0, 1, size=(data_num, latent_dim))
    weights = vert(1. / beta.pdf(latent[:, 0], alpha, 1.))
    weights_unthinned = vert(1. / beta.pdf(latent_unthinned[:, 0], alpha, 1.))
 def beta_pdf(x): return beta_rv.pdf(x, a=alpha_stat, b=beta_stat)
 x_marginal_pdfs = [beta_pdf]*num_vars
Esempio n. 56
0
    def test_multi_action_distribution(self):
        """Tests the MultiActionDistribution (across all frameworks)."""
        batch_size = 1000
        input_space = Tuple([
            Box(-10.0, 10.0, shape=(batch_size, 4)),
            Box(-2.0, 2.0, shape=(
                batch_size,
                6,
            )),
            Dict({"a": Box(-1.0, 1.0, shape=(batch_size, 4))}),
        ])
        std_space = Box(-0.05, 0.05, shape=(
            batch_size,
            3,
        ))

        low, high = -1.0, 1.0
        value_space = Tuple([
            Box(0, 3, shape=(batch_size, ), dtype=np.int32),
            Box(-2.0, 2.0, shape=(batch_size, 3), dtype=np.float32),
            Dict({"a": Box(0.0, 1.0, shape=(batch_size, 2), dtype=np.float32)})
        ])

        for fw, sess in framework_iterator(session=True):
            if fw == "torch":
                cls = TorchMultiActionDistribution
                child_distr_cls = [
                    TorchCategorical, TorchDiagGaussian,
                    partial(TorchBeta, low=low, high=high)
                ]
            else:
                cls = MultiActionDistribution
                child_distr_cls = [
                    Categorical,
                    DiagGaussian,
                    partial(Beta, low=low, high=high),
                ]

            inputs = list(input_space.sample())
            distr = cls(np.concatenate([inputs[0], inputs[1], inputs[2]["a"]],
                                       axis=1),
                        model={},
                        action_space=value_space,
                        child_distributions=child_distr_cls,
                        input_lens=[4, 6, 4])

            # Adjust inputs for the Beta distr just as Beta itself does.
            inputs[2]["a"] = np.clip(inputs[2]["a"], np.log(SMALL_NUMBER),
                                     -np.log(SMALL_NUMBER))
            inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
            # Sample deterministically.
            expected_det = [
                np.argmax(inputs[0], axis=-1),
                inputs[1][:, :3],  # [:3]=Mean values.
                # Mean for a Beta distribution:
                # 1 / [1 + (beta/alpha)] * range + low
                (1.0 /
                 (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, 0:2])) *
                (high - low) + low,
            ]
            out = distr.deterministic_sample()
            if sess:
                out = sess.run(out)
            check(out[0], expected_det[0])
            check(out[1], expected_det[1])
            check(out[2]["a"], expected_det[2])

            # Stochastic sampling -> expect roughly the mean.
            inputs = list(input_space.sample())
            # Fix categorical inputs (not needed for distribution itself, but
            # for our expectation calculations).
            inputs[0] = softmax(inputs[0], -1)
            # Fix std inputs (shouldn't be too large for this test).
            inputs[1][:, 3:] = std_space.sample()
            # Adjust inputs for the Beta distr just as Beta itself does.
            inputs[2]["a"] = np.clip(inputs[2]["a"], np.log(SMALL_NUMBER),
                                     -np.log(SMALL_NUMBER))
            inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
            distr = cls(np.concatenate([inputs[0], inputs[1], inputs[2]["a"]],
                                       axis=1),
                        model={},
                        action_space=value_space,
                        child_distributions=child_distr_cls,
                        input_lens=[4, 6, 4])
            expected_mean = [
                np.mean(np.sum(inputs[0] * np.array([0, 1, 2, 3]), -1)),
                inputs[1][:, :3],  # [:3]=Mean values.
                # Mean for a Beta distribution:
                # 1 / [1 + (beta/alpha)] * range + low
                (1.0 / (1.0 + inputs[2]["a"][:, 2:] / inputs[2]["a"][:, :2])) *
                (high - low) + low,
            ]
            out = distr.sample()
            if sess:
                out = sess.run(out)
            out = list(out)
            if fw == "torch":
                out[0] = out[0].numpy()
                out[1] = out[1].numpy()
                out[2]["a"] = out[2]["a"].numpy()
            check(np.mean(out[0]), expected_mean[0], decimals=1)
            check(np.mean(out[1], 0), np.mean(expected_mean[1], 0), decimals=1)
            check(np.mean(out[2]["a"], 0),
                  np.mean(expected_mean[2], 0),
                  decimals=1)

            # Test log-likelihood outputs.
            # Make sure beta-values are within 0.0 and 1.0 for the numpy
            # calculation (which doesn't have scaling).
            inputs = list(input_space.sample())
            # Adjust inputs for the Beta distr just as Beta itself does.
            inputs[2]["a"] = np.clip(inputs[2]["a"], np.log(SMALL_NUMBER),
                                     -np.log(SMALL_NUMBER))
            inputs[2]["a"] = np.log(np.exp(inputs[2]["a"]) + 1.0) + 1.0
            distr = cls(np.concatenate([inputs[0], inputs[1], inputs[2]["a"]],
                                       axis=1),
                        model={},
                        action_space=value_space,
                        child_distributions=child_distr_cls,
                        input_lens=[4, 6, 4])
            inputs[0] = softmax(inputs[0], -1)
            values = list(value_space.sample())
            log_prob_beta = np.log(
                beta.pdf(values[2]["a"], inputs[2]["a"][:, :2],
                         inputs[2]["a"][:, 2:]))
            # Now do the up-scaling for [2] (beta values) to be between
            # low/high.
            values[2]["a"] = values[2]["a"] * (high - low) + low
            inputs[1][:, 3:] = np.exp(inputs[1][:, 3:])
            expected_log_llh = np.sum(
                np.concatenate([
                    np.expand_dims(
                        np.log(
                            [i[values[0][j]]
                             for j, i in enumerate(inputs[0])]), -1),
                    np.log(
                        norm.pdf(values[1], inputs[1][:, :3],
                                 inputs[1][:, 3:])), log_prob_beta
                ], -1), -1)

            values[0] = np.expand_dims(values[0], -1)
            if fw == "torch":
                values = tree.map_structure(lambda s: torch.Tensor(s), values)
            # Test all flattened input.
            concat = np.concatenate(tree.flatten(values),
                                    -1).astype(np.float32)
            out = distr.logp(concat)
            if sess:
                out = sess.run(out)
            check(out, expected_log_llh, atol=15)
            # Test structured input.
            out = distr.logp(values)
            if sess:
                out = sess.run(out)
            check(out, expected_log_llh, atol=15)
            # Test flattened input.
            out = distr.logp(tree.flatten(values))
            if sess:
                out = sess.run(out)
            check(out, expected_log_llh, atol=15)
Esempio n. 57
0
from scipy.stats import beta
import numpy as np
import matplotlib.pyplot as plt
import os
import pyprobml_utils as pml

x = np.linspace(0, 1, 100)
aa = [0.1, 1., 2., 8.]
bb = [0.1, 1., 3., 4.]
props = ['b-', 'r:', 'b-.', 'g--']
for a, b, p in zip(aa, bb, props):
    y = beta.pdf(x, a, b)
    plt.plot(y, p, lw=3, label='a=%.1f,b=%.1f' % (a, b))
plt.legend(loc='upper left')
pml.savefig('betaPlotDemo.png')
plt.show()
Esempio n. 58
0
import scipy
from scipy.stats import beta

alphas = [2, 2, 1, 1]
betas = [2, 2, 1, 1]
Ns = [4, 40, 4, 40]
ks = [1, 10, 1, 10]
plots = ['betaPostInfSmallSample', 'betaPostInfLargeSample',
         'betaPostUninfSmallSample', 'betaPostUninfLargeSample']

x = np.linspace(0.001, 0.999, 50)
for i in range(len(plots)):
  alpha_prior = alphas[i]
  beta_prior = betas[i]
  N = Ns[i]
  k = ks[i]
  alpha_post = alpha_prior + N - k
  beta_post = beta_prior + k
  alpha_lik = N - k + 1
  beta_lik = k + 1

  pl.plot(x, beta.pdf(x, alpha_prior, beta_prior), 'r-', 
          label='prior Be(%2.1f, %2.1f)' % (alpha_prior, beta_prior))
  pl.plot(x, beta.pdf(x, alpha_lik, beta_lik), 'k:', 
          label='lik Be(%2.1f, %2.1f)' % (alpha_lik, beta_lik))
  pl.plot(x, beta.pdf(x, alpha_post, beta_post), 'b-', 
          label='post Be(%2.1f, %2.1f)' % (alpha_post, beta_post))
  pl.legend(loc='upper left')
  pl.savefig(plots[i] + '.png')
  pl.show()
Esempio n. 59
0
from scipy.stats import beta
print(beta.pdf(2, 3, 3))
Esempio n. 60
0
def dpbmm(data, num_iter, param=None):
    # Beta value Dirichlet process mixture model, with no gap algorithm
    s_data = np.shape(data)
    G = copy.copy(s_data[0])  ## number of genes
    C = copy.copy(s_data[1])  ## number of samples
    count_sum = 100

    # parameters for G0, Gamma(a, b)
    a = 6
    b = 5

    if param is None:
        param = params(mu_a=3.2,
                       mu_b=2.2,
                       sigma2_a=5,
                       sigma2_b=2,
                       k=C,
                       m_s=np.ones(C),
                       s=np.arange(C),
                       tau=5)
        param.param_val(C, G)

    for i in range(num_iter):
        new_param = copy.deepcopy(param)
        ## Gibbs sampling
        ## step 1: resample s

        s = copy.copy(new_param.s)
        k = copy.copy(new_param.k)
        m_s = copy.copy(new_param.m_s)
        alpha_val = copy.copy(new_param.alpha_val)
        beta_val = copy.copy(new_param.beta_val)
        tau = copy.copy(new_param.tau)
        mu_a = copy.copy(new_param.mu_a)
        mu_b = copy.copy(new_param.mu_b)
        sigma2a = copy.copy(new_param.sigma2_a)
        sigma2b = copy.copy(new_param.sigma2_b)

        for j in range(C):
            s, k = rearrange_s(s)
            m_s, k = calculate_m_s(s)
            if m_s[s[j]] == 1:
                u = np.random.rand()
                if u < (k - 1) / k:
                    continue
                ind = np.where(s == (k - 1))[0]
                tmp = copy.copy(s[j])
                s[ind] = copy.copy(tmp)
                s[j] = k - 1

                tmp_alpha_val = copy.copy(alpha_val[:, tmp])
                alpha_val[:, tmp] = copy.copy(alpha_val[:, k - 1])
                alpha_val[:, k - 1] = copy.copy(tmp_alpha_val)
                tmp_beta_val = copy.copy(beta_val[:, tmp])
                beta_val[:, tmp] = copy.copy(beta_val[:, k - 1])
                beta_val[:, k - 1] = copy.copy(tmp_beta_val)

                m_s, k = calculate_m_s(s, c=j)
                p_x = np.zeros(k)

                for l in range(k):
                    L_alpha_val, L_beta_val = exp_trans(
                        alpha_val[:, l], beta_val[:, l])
                    p_x[l] = np.prod(
                        beta.pdf(data[:, j], L_alpha_val, L_beta_val))

                w = m_s / (tau + j) * p_x

                if k < C:
                    L_alpha_val, L_beta_val = exp_trans(
                        alpha_val[:, k], beta_val[:, k])
                    # w[k+1] = tau/(tau+j-1) * np.prod(beta(data[:, j], L_alpha_val, L_beta_val))
                    temp1 = np.prod(
                        beta.pdf(data[:, j], L_alpha_val, L_beta_val))
                    p_x = np.append(p_x, temp1)
                    w = np.append(w, tau / (tau + j) * temp1)
                    population = np.arange(k + 1)

                else:
                    population = np.arange(k)
                w = w / np.sum(w)
                s[j] = np.random.choice(population, size=1, p=w)
            else:
                # del m_s
                # del p_x
                # del w
                m_s, k = calculate_m_s(s, c=j)
                p_x = np.zeros(k)

                for l in range(k):
                    L_alpha_val, L_beta_val = exp_trans(
                        alpha_val[:, l], beta_val[:, l])
                    p_x[l] = np.prod(
                        beta.pdf(data[:, j], L_alpha_val, L_beta_val))

                w = m_s / (tau + j) * p_x

                if k < C:
                    L_alpha_val, L_beta_val = exp_trans(
                        alpha_val[:, k], beta_val[:, k])
                    p_x = np.append(
                        p_x,
                        np.prod(beta.pdf(data[:, j], L_alpha_val, L_beta_val)))
                    w = np.append(w, p_x[k] * tau / (tau + j))
                    population = np.arange(k + 1)
                else:
                    population = np.arange(k)
                w = w / np.sum(w)
                s[j] = np.random.choice(population, size=1, p=w)
        s, k = rearrange_s(s)
        m_s, k = calculate_m_s(s, c=0)

        # new_alpha_val = np.zeros([G, k])
        # new_beta_val = np.zeros([G, k])
        for g in range(G):

            for j in np.arange(k, C):
                alpha_val[g, j] = np.random.normal(loc=mu_a,
                                                   scale=sigma2a,
                                                   size=1)
                beta_val[g, j] = np.random.normal(loc=mu_b,
                                                  scale=sigma2b,
                                                  size=1)

            V_a = sigma2a * np.ones([G, G])
            V_b = sigma2b * np.ones([G, G])

            for j in range(k):
                new_alpha_val = np.random.multivariate_normal(
                    mean=alpha_val[:, j], cov=V_a)
                new_beta_val = np.random.multivariate_normal(mean=beta_val[:,
                                                                           j],
                                                             cov=V_b)

                count = 0
                accept = 0
                c_1 = 0
                ind = np.where(s == j)

                while accept == 0:
                    for count_ind in range(count_sum):
                        p_xi = np.zeros(len(ind))
                        p_xi_t = np.zeros(len(ind))
                        new_alpha_val[g] = np.random.normal(loc=alpha_val[g,
                                                                          j],
                                                            scale=sigma2a,
                                                            size=1)
                        # new_beta_val[g, i] = np.random.normal(loc=beta_val[g, i], scale=sigma2b, size=1)
                        # ind = np.where(s == j)
                        L_alpha_val, L_beta_val = exp_trans(
                            new_alpha_val, beta_val[:, j])
                        L_alpha_val_t, L_beta_val_t = exp_trans(
                            alpha_val[:, j], beta_val[:, j])

                        for m in range(len(ind)):
                            p_xi[m] = np.sum(
                                np.log(
                                    beta.pdf(data[:, ind[m]], L_alpha_val,
                                             L_beta_val)))
                            p_xi_t[m] = np.sum(
                                np.log(
                                    beta.pdf(data[:, ind[m]], L_alpha_val_t,
                                             L_beta_val_t)))

                        sum_p_xi = np.sum(p_xi)
                        sum_p_xi_t = np.sum(p_xi_t)

                        fx = np.dot(
                            norm.pdf(alpha_val[:, j], np.zeros(G), sigma2a),
                            norm.pdf(beta_val[:, j], np.zeros(G), sigma2b))
                        fx_t = np.dot(
                            norm.pdf(new_alpha_val, np.zeros(G), sigma2a),
                            norm.pdf(beta_val[:, j], np.zeros(G), sigma2b))

                        # Metropolis Hastings sampling
                        if sum_p_xi == sum_p_xi_t and sum_p_xi == -np.inf:
                            if fx == fx_t and fx == 0:
                                tmp = 0
                            else:
                                tmp = np.exp(np.log(fx) - np.log(fx_t))
                        else:
                            if fx == fx_t and fx == 0:
                                tmp = np.exp(sum_p_xi - sum_p_xi_t)
                            else:
                                if fx == 0 and sum_p_xi_t == -np.inf:
                                    tmp = np.exp(sum_p_xi - np.log(fx_t))
                                elif fx_t == 0 and sum_p_xi == -np.inf:
                                    tmp = np.exp(fx - sum_p_xi_t)
                                else:
                                    tmp = np.exp(
                                        np.log(fx) + sum_p_xi - np.log(fx_t) -
                                        sum_p_xi_t)

                        u = np.random.rand()
                        # count_ind += 1
                        if u < tmp:
                            alpha_val[g, j] = new_alpha_val[g]
                            count += 1

                    c_1 += 1

                    if count >= 3 or c_1 > 4:
                        accept = 1
                    else:
                        sigma2a *= 0.98
                        if sigma2a < 0.01:
                            sigma2a = 0.01
                        if count > 20:
                            sigma2a /= 0.98

                accept = 0
                count = 0
                c_2 = 0
                while accept == 0:
                    for count_ind in range(count_sum):
                        new_beta_val[g] = np.random.normal(loc=beta_val[g, j],
                                                           scale=sigma2b,
                                                           size=1)

                        L_alpha_val, L_beta_val = exp_trans(
                            alpha_val[:, j], new_beta_val)
                        L_alpha_val_t, L_beta_val_t = exp_trans(
                            alpha_val[:, j], beta_val[:, j])

                        p_xi = np.zeros(len(ind))
                        p_xi_t = np.zeros(len(ind))
                        for m in range(len(ind)):
                            p_xi[m] = np.sum(
                                np.log(
                                    beta.pdf(data[:, ind[m]], L_alpha_val,
                                             L_beta_val)))
                            p_xi_t = np.sum(
                                np.log(
                                    beta.pdf(data[:, ind[m]], L_alpha_val_t,
                                             L_beta_val_t)))

                        sum_p_xi = np.sum(p_xi)
                        sum_p_xi_t = np.sum(p_xi_t)

                        fx = np.dot(
                            norm.pdf(alpha_val[:, j], np.zeros(G), sigma2a),
                            norm.pdf(beta_val[:, j], np.zeros(G), sigma2b))
                        fx_t = np.dot(
                            norm.pdf(new_alpha_val, np.zeros(G), sigma2a),
                            norm.pdf(new_beta_val, np.zeros(G), sigma2b))

                        # Metropolis Hastings sampling
                        if sum_p_xi == sum_p_xi_t and sum_p_xi == -np.inf:
                            if fx == fx_t and fx == 0:
                                tmp = 0
                            else:
                                tmp = np.exp(np.log(fx) - np.log(fx_t))
                        else:
                            if fx == fx_t and fx == 0:
                                tmp = np.exp(sum_p_xi - sum_p_xi_t)
                            else:
                                if fx == 0 and sum_p_xi_t == -np.inf:
                                    tmp = np.exp(sum_p_xi - np.log(fx_t))
                                elif fx_t == 0 and sum_p_xi == -np.inf:
                                    tmp = np.exp(fx - sum_p_xi_t)
                                else:
                                    tmp = np.exp(
                                        np.log(fx) + sum_p_xi - np.log(fx_t) -
                                        sum_p_xi_t)

                        u = np.random.rand()
                        if u < tmp:
                            beta_val[g, j] = new_beta_val[g]
                            count += 1

                    c_2 += 1

                    if count >= 3 or c_2 > 4:
                        accept = 1
                    else:
                        sigma2b *= 0.98
                        if sigma2b < 0.01:
                            sigma2b = 0.01
                        if count > 20:
                            sigma2b /= sigma2b

        # step 3: resampling mixture weights pi
        if k == 1:
            continue
        m_s, k = calculate_m_s(s)
        pi = dirichletrnd(m_s + tau / k)

        # step 4: resampling concentration parameter tau
        r = np.random.beta(a=tau + 1, b=C, size=1)
        eta_r = 1 / (C * (b - np.log(r)) / (a + k - 1) + 1)
        tmp = np.random.rand()
        if tmp < eta_r:
            tau_new = np.random.gamma(shape=a + k, scale=b - np.log(r), size=1)
        else:
            tau_new = np.random.gamma(shape=a + k - 1,
                                      scale=b - np.log(r),
                                      size=1)
        tau = tau_new

        # step 5: update parameters for the usage in the new iteration
        new_param.s = s
        new_param.k = k
        new_param.m_s = m_s
        new_param.alpha_val = alpha_val
        new_param.beta_val = beta_val
        new_param.tau = tau
        new_param.pi = pi
        new_param.sigma2_a = sigma2a
        new_param.sigma2_b = sigma2b

        param = copy.deepcopy(new_param)

        file.write(str(i) + " iterations done")

    print(param.s)
    return param