def probability_b_is_better_than_a(a_a, b_a, a_b, b_b): total = 0.0 for i in range(a_b - 1): numerator = beta_func(a_a + i, b_a + b_b) denominator = (b_b + i) * beta_func(1+i, b_b) * beta_func(a_a, b_a) total += (numerator / denominator) return total
def fit(self,D,pN): # get number of trials T = int(D.sum()) # sum over all elements in D self.T = T # save this Nmax = len(pN) - 1 # maximum possible N value in a well Ntotalmax = T * Nmax # maximum total possible N's summed up self.Ntotalmax = Ntotalmax # save this # preallocate array for (N,N1) possibilities # 2D array with (Ntotalmax x Ntotalmax) possibilities Ns = np.zeros((Ntotalmax + 1,Ntotalmax + 1)) # precompute updaters updaters = np.zeros((2,2,Nmax+1,Nmax+1)) for i in xrange(0,2): for j in xrange(0,2): updaters[i,j] = self.get_updater((i,j),pN) tmp_Ns = [] # iterate over data matrix it = np.nditer(D, flags=['multi_index']) while not it.finished: dval = it[0] dindex = it.multi_index # need to perform convolution on 2D Ns for each recorded trial if not dval == 0: tmp_Ns.append(self.convpower2D(updaters[dindex],dval)) ''' for i in xrange(dval): Ns = self.convolute2D(Ns,updaters[dindex]) Ns = Ns / Ns.sum() # just to maintain normalization ''' it.iternext() Ns = reduce(fftconvolve,tmp_Ns) Ns = Ns * (Ns > 0) Ns = Ns / Ns.sum() # save result self.Ns = Ns # produce proper weights for beta distributions bweights = np.zeros((Ntotalmax+1,Ntotalmax+1)) for i in xrange(Ntotalmax+1): for j in xrange(i+1): # can only have at most value N for N1 # with the fftconvolve this was overweighting things with beta_func # values O(0.1), threshold 1e-15 works well for fft if Ns[i,j] > 1e-15: bweights[i,j] = Ns[i,j] * beta_func(j + self.alpha,i-j + self.beta) # sometimes beta weights are so ridiculously small everything just ends up 0 # especially I suspect if N is off if bweights.sum() > 0: bweights = bweights / bweights.sum() else: bweights = Ns self.bweights = bweights return Ns
def log_prior(self, value): """ Evaluates and returns the log of this prior when the variable is value. value numerical value of the variable """ if (value <= 0) or (value >= 1): return -np.inf return (self._alpha_min_one * np.log(value)) +\ (self._beta_min_one * np.log(1. - value)) -\ np.log(beta_func(self.alpha, self.beta))
def fit(self,D,N): # get number of trials T = int(D.sum()) # sum over all elements in D self.T = T # save this Ntotal = T * N self.Ntotal = Ntotal # save this # preallocate array for N1 possibilities N1s = np.zeros(Ntotal+1) N1s[0] = 1.0 w = 2 ** N - 2 # to weight combination trials # iterate over data matrix it = np.nditer(D, flags=['multi_index']) while not it.finished: dval = it[0] dindex = it.multi_index # need to perform convolution on N1s for each recorded trial for i in xrange(dval): updater = np.zeros(N+1) # hold prob weights to convolute by if dindex == (1,0): updater[N] += 1 elif dindex == (0,1): updater[0] += 1 elif dindex == (1,1): for j in xrange(1,N): updater[j] += comb(N,j) / w else: # ignore (0,0) pass N1s = self.convolute(N1s,updater) N1s = N1s / sum(N1s) # just to maintain normalization it.iternext() # save result self.N1s = N1s # produce proper weights for beta distributions bweights = np.zeros(Ntotal+1) for i in xrange(len(N1s)): if N1s[i] > 0: bweights[i] = N1s[i] * beta_func(i + self.alpha,Ntotal-i + self.beta) # sometimes beta weights are so ridiculously small everything just ends up 0 # especially I suspect if N is off if sum(bweights) > 0: bweights = bweights / sum(bweights) else: bweights = N1s self.bweights = bweights return N1s
def _find_beta_distribution_parameters(self, X, N_l: int) -> Tuple[float, float]: """ Function implementing gradient ascent to find parameters of the beta distribution for the given class. It maximizes the following log-likelihood: .. math:: l_l (\\alpha, \\beta) = - N_l \\, log \\, B (\\alpha, \\beta) + \\sum_{i: c_i = l} log \\, B(k_i + \\alpha, n_i - k_i + \\beta), l = 0, 1 Arguments: X: design matrix of shape [number of examples x number of features], where number of features is 2 (the first feature is the number of disease-associated sequences and the second is the total number of sequences per example) N_l: number of examples in the given class Returns: estimated values of alpha and beta for the given class """ k_is, n_is = X[:, 0], X[:, 1] alpha, beta = self._initialize_beta_distribution_parameters(k_is, n_is) k_is, n_is = self._perform_laplace_smoothing(k_is, n_is) for iteration in range(self.max_iterations): log_likelihood = -N_l * beta_func(alpha, beta) + np.sum( beta_func_ln(k_is + alpha, n_is - k_is + beta)) if np.isnan(log_likelihood): raise RuntimeError( f"ProbabilisticBinaryClassifier: while estimating beta distribution parameters, " f"log_likelihood became nan in iteration {iteration}. \nalpha: {alpha}, beta: {beta}" ) elif log_likelihood > self.likelihood_threshold: break grad_alpha, grad_beta = self._compute_alpha_beta_gradients( N_l, alpha, beta, k_is, n_is) alpha = max(alpha + self.update_rate * grad_alpha, ProbabilisticBinaryClassifier.SMALL_POSITIVE_NUMBER) beta = max(beta + self.update_rate * grad_beta, ProbabilisticBinaryClassifier.SMALL_POSITIVE_NUMBER) return alpha, beta
def beta_prior(self, param_name, param_value): ''' Check if given param_value is valid according to the prior distribution. Returns the log prior probability or np.inf if the param_value is invalid. ''' prior_dict = self.prior if prior_dict is None: raise ValueError('No prior found') alpha = prior_dict[param_name][1] beta = prior_dict[param_name][2] import scipy.special.beta as beta_func prob = (param_value**(alpha - 1) * (1 - param_value)**(beta - 1)) / beta_func(alpha, beta) if prob < 0: warnings.warn( 'Probability less than 0 while checking Exponential prior! Current parameter name and value: {0}:{1}.' .format(param_name, param_value)) return np.inf else: return np.log(prob)
def make_beta(alpha, beta): if alpha <= -1 or beta <= -1: raise ValueError("Parameters must be greater than -1", alpha, beta) beta_01 = partial(random.betavariate, beta + 1, alpha + 1) # random uses switched notation... def generator(): return beta_01() * 2 - 1 # scaling from [0,1] to [-1,1] return Distribution( "Beta", lambda x: (((1 - x)**alpha) * ((1 + x)**beta) / (2**(alpha + beta + 1)) / beta_func(alpha + 1, beta + 1) if -1. < x < 1. else 0.), (-1, 1), generator, # switched notation in stats, so a=beta+1 is by purpose inverse_distribution=partial(stats_beta.ppf, a=beta + 1, b=alpha + 1, loc=-1, scale=2), show_name="Beta({}, {})".format(alpha, beta), parameters=(alpha, beta))
def beta_binomial(n, alpha, beta): return np.array([comb(n - 1, k) * beta_func(k + alpha, n - 1 - k + beta) / beta_func(alpha, beta) for k in range(n)])
def beta_binomial(alpha, beta, n=100): return np.matrix([comb(n-1,k) * beta_func(k+alpha, n-1-k+beta) / beta_func(alpha,beta) for k in range(n)])
def __init__(self, a, b): self.a = a self.b = b self.normalization = (1. / beta_func(self.a, self.b)) self.log_normalization = np.log(self.normalization)
def bern_beta(prior_shape, data_vec, cred_mass=0.95): """Bayesian updating for Bernoulli likelihood and beta prior. Input arguments: prior_shape vector of parameter values for the prior beta distribution. data_vec vector of 1's and 0's. cred_mass the probability mass of the HDI. Output: post_shape vector of parameter values for the posterior beta distribution. Graphics: Creates a three-panel graph of prior, likelihood, and posterior with highest posterior density interval. Example of use: post_shape = bern_beta(prior_shape=[1,1] , data_vec=[1,0,0,1,1])""" import sys import numpy as np from scipy.stats import beta from scipy.special import beta as beta_func import matplotlib.pyplot as plt from HDIofICDF import HDIofICDF # Check for errors in input arguments: if len(prior_shape) != 2: sys.exit('prior_shape must have two components.') if any([i < 0 for i in prior_shape]): sys.exit('prior_shape components must be positive.') if any([i != 0 and i != 1 for i in data_vec]): sys.exit('data_vec must be a vector of 1s and 0s.') if cred_mass <= 0 or cred_mass >= 1.0: sys.exit('cred_mass must be between 0 and 1.') # Rename the prior shape parameters, for convenience: a = prior_shape[0] b = prior_shape[1] # Create summary values of the data: z = sum(data_vec[data_vec == 1]) # number of 1's in data_vec N = len(data_vec) # number of flips in data_vec # Compute the posterior shape parameters: post_shape = [a+z, b+N-z] # Compute the evidence, p(D): p_data = beta_func(z+a, N-z+b)/beta_func(a, b) # Construct grid of theta values, used for graphing. bin_width = 0.005 # Arbitrary small value for comb on theta. theta = np.arange(bin_width/2, 1-(bin_width/2)+bin_width, bin_width) # Compute the prior at each value of theta. p_theta = beta.pdf(theta, a, b) # Compute the likelihood of the data at each value of theta. p_data_given_theta = theta**z * (1-theta)**(N-z) # Compute the posterior at each value of theta. post_a = a + z post_b = b+N-z p_theta_given_data = beta.pdf(theta, a+z, b+N-z) # Determine the limits of the highest density interval intervals = HDIofICDF(beta, cred_mass, a=post_shape[0], b=post_shape[1]) # Plot the results. plt.figure(figsize=(12, 12)) plt.subplots_adjust(hspace=0.7) # Plot the prior. locx = 0.05 plt.subplot(3, 1, 1) plt.plot(theta, p_theta) plt.xlim(0, 1) plt.ylim(0, np.max(p_theta)*1.2) plt.xlabel(r'$\theta$') plt.ylabel(r'$P(\theta)$') plt.title('Prior') plt.text(locx, np.max(p_theta)/2, r'beta($\theta$;%s,%s)' % (a, b)) # Plot the likelihood: plt.subplot(3, 1, 2) plt.plot(theta, p_data_given_theta) plt.xlim(0, 1) plt.ylim(0, np.max(p_data_given_theta)*1.2) plt.xlabel(r'$\theta$') plt.ylabel(r'$P(D|\theta)$') plt.title('Likelihood') plt.text(locx, np.max(p_data_given_theta)/2, 'Data: z=%s, N=%s' % (z, N)) # Plot the posterior: plt.subplot(3, 1, 3) plt.plot(theta, p_theta_given_data) plt.xlim(0, 1) plt.ylim(0, np.max(p_theta_given_data)*1.2) plt.xlabel(r'$\theta$') plt.ylabel(r'$P(\theta|D)$') plt.title('Posterior') locy = np.linspace(0, np.max(p_theta_given_data), 5) plt.text(locx, locy[1], r'beta($\theta$;%s,%s)' % (post_a, post_b)) plt.text(locx, locy[2], 'P(D) = %g' % p_data) # Plot the HDI plt.text(locx, locy[3], 'Intervals = %.3f - %.3f' % (intervals[0], intervals[1])) plt.fill_between(theta, 0, p_theta_given_data, where=np.logical_and(theta > intervals[0], theta < intervals[1]), color='blue', alpha=0.3) return intervals
def BernBeta(priorShape, dataVec, credMass=0.95, saveGraph=False): """Bayesian updating for Bernouli likelihood and beta prior. Input arguments: priorShape vectorof parameter values for the prior beta distribution dataVec vector of 1 and 0 credMass the probability mass of the equal tailed credible interval Output: postShape vector of the parameter values for the posterior beta distribution Graphics: Creates a three panel graph of prior, likelihood, and posterior with highest posterior density interval Example of use: post_shape = bernBeta(priorShape=[1, 1], dataVec=[1, 0, 0, 1, 1]""" # check for errors in the input arguments: if len(priorShape) != 2: sys.exit("prior shape must have two components") if any([i < 0 for i in priorShape]): sys.exit("priorShape components must be positive") if any([i != 0 and i != 1 for i in dataVec]): sys.exit("dataVec must only contain 0 or 1") if credMass <= 0 or credMass >= 1: sys.exit("credMass must be between 0 and 1") # rename the prior shape parameters for convenience a = priorShape[0] b = priorShape[1] # create summary values of the data: z = sum(dataVec) # number of 1 in datavec N = len(dataVec) # length of datavec # compute the posterior shape parameters postShape = [a + z, b + N - z] # compute the evidence p(D) pData = beta_func(z + a, N - z + b) / beta_func(a, b) # Now plot everything # Construct a grid of theta values, used for graphing bin_width = 0.005 theta = np.arange(bin_width / 2, 1 - (bin_width / 2) + bin_width, bin_width) # Compute the likelihood of the data at each value of theta. p_theta = beta.pdf(theta, a, b) # Compute the likelihood of the data at each value of theta. p_theta_given_theta = theta**z * (1 - theta)**(N - z) # Computer the posterior at each value of theta. post_a = a + z post_b = b + N - z p_theta_given_data = beta.pdf(theta, a + z, b + N - z) # Determine the limits of the highest density interval intervals = HDIofICDF(beta, credMass, a=postShape[0], b=postShape[1]) # Plot the results plt.figure(figsize=(12, 12)) plt.subplots_adjust(hspace=0.7) locx = 0.05 plt.subplot(3, 1, 1) plt.plot(theta, p_theta) plt.xlim(0, 1) plt.ylim(0, np.max(p_theta) * 1.2) plt.xlabel(r"$\theta$") plt.ylabel(r"$P(\theta|D)$") plt.title("Posterior") locy = np.linspace(0, np.max(p_theta_given_theta), 5) plt.text(locx, locy[1], r"beta($\theta$, {}, {})".format(post_a, post_b)) plt.text(locx, locy[2], "P(D) = %g" % pData) plt.text(locx, locy[3], "Intervals = %.3f - %.3f" % (intervals[0], intervals[1])) plt.fill_between( theta, 0, p_theta_given_data, where=np.logical_and(theta > intervals[0], theta < intervals[1]), color="blue", alpha=0.3, ) return intervals
def betafn(x, a, b): return betainc(a, b, x) * beta_func(a, b)
def beta_binomial(n, alpha, beta): return np.array([ comb(n - 1, k) * beta_func(k + alpha, n - 1 - k + beta) / beta_func(alpha, beta) for k in range(n) ])
def beta_binomial(alpha, beta, n=100): return np.matrix([ comb(n - 1, k) * beta_func(k + alpha, n - 1 - k + beta) / beta_func(alpha, beta) for k in range(n) ])
def fit(dictionary=None, files=[], dirs=[], match='', skip='$.^', batch_size=1000, num_topics=DEFAULT_NUM_TOPICS, time_range=None, alpha=None, beta=0.1, num_procs=NUM_PROCS, read=None, num_docs=None, min_frequency=5, num_epochs=100): # If we don't have the number of documents or a dictionary, then # run over the full dataset once to accumulate that information. if dictionary is None or num_docs is None or time_range is None: dictionary, num_docs, found_time_range = (get_corpus_stats( files=files, dirs=dirs, match=match, skip=skip, batch_size=batch_size, num_procs=num_procs, read=read, stopwords=STOPWORDS, min_frequency=min_frequency)) if time_range is None: time_range = found_time_range if alpha is None: alpha = 1. total_docs = sum(num_docs) proc_doc_indices = [sum(num_docs[:i]) for i in range(len(num_docs) + 1)] m = np.ones((total_docs, num_topics)) n = np.ones((len(dictionary), num_topics)) psi = np.ones((num_topics, 2)) #TODO: move worker creation outside of the epoch -- keep same worker pool # between epochs. Workers can receive updates about m and n etc. over the # queue. for epoch in range(num_epochs): # Show progress print(float(epoch) / num_epochs * 100) # Pre-calculate the denominator in the sum of the probability dist n_denom = (n + beta).sum(axis=0) - 1 B = np.array([beta_func(*psi_vals) for psi_vals in psi]) denom = n_denom * B # The workers should calculate probabilities and then sample, producing # updates to m and n. updates_queue = IterableQueue() ctx = mp.get_context("spawn") for proc_num in range(num_procs): # Advance the randomness so children don't all get same seed np.random.random() doc_iterator = DocumentIterator( read=read, files=files, dirs=dirs, match=match, skip=skip, batch_size=batch_size, fold='%s/%s' % (proc_num, num_procs), ) m_slice = m[proc_doc_indices[proc_num]:proc_doc_indices[proc_num + 1]] p = ctx.Process(target=worker, args=(proc_num, doc_iterator, dictionary, num_topics, time_range, alpha, beta, psi, n, m_slice, denom, updates_queue.get_producer())) p.start() updates_consumer = updates_queue.get_consumer() updates_queue.close() # Update m, n, and psi n = np.zeros((len(dictionary), num_topics)) m = np.zeros((total_docs, num_topics)) psi_updates = [[] for i in range(num_topics)] for proc_num, m_update, n_update, psi_update in updates_consumer: n += n_update start_idx = proc_doc_indices[proc_num] stop_idx = proc_doc_indices[proc_num + 1] m[start_idx:stop_idx] = m_update for i in range(num_topics): psi_updates[i].extend(psi_update[i]) # Update psi for i in range(num_topics): psi[i] = fit_psi(psi_updates[i]) return m, n, psi, dictionary