def get_raw_probs(self, k, n, x, a, Stat): if (n, x, a) not in self.rawProbs: #for B1, polymorphism is 1, substitution is 0 if Stat == "B1": #sanity check assert set(k) == set([0, 1]) b = self._get_b(x, a) probs = np.where(k == 0, betabinom(n, a, b).pmf(n), (1. - betabinom(n, a, b).pmf(n) - betabinom(n, a, b).pmf(n))) self.rawProbs[(n, x, a)] = probs elif Stat == "B2" or Stat == 'B0': self.rawProbs[(n, x, a)] = self._get_betabinom_probs(k, n, x, a) elif Stat == "B2maf" or Stat == 'B0maf': probs = self._get_betabinom_probs( k, n, x, a) + self._get_betabinom_probs(n - k, n, x, a) #n/2 double counted if n is even if n % 2 == 0: probs = np.where(k == int(n / 2), probs / 2, probs) self.rawProbs[(n, x, a)] = probs return (self.rawProbs[(n, x, a)])
def fit(self): """Fits the null and alternative models.""" if not self.binomial: # attempt Beta-Binomial parameter estimation data = np.hstack([self.a, self.d - self.a]) self.alpha1, self.niter1 = fit_polya(data=data) if np.isinf(self.alpha1).all() or (self.alpha1 == 0).all(): self.binomial = True msg = 'Overdispersion estimate out of bounds.' msg += ' Reverting to Binomial LRT.' print(msg) # fit alternative model / estimate negative log-likelihoods if not self.binomial: # fit overdispersion parameter m0 = np.asarray([self.m0, 1 - self.m0]) s0, self.niter0 = fit_polya_precision(data, m=m0) self.alpha0 = reparameterize_polya_ms(m0, s0) self.nll0 = -betabinom(n=self.d, a=self.alpha0[0], b=self.alpha0[1]).logpmf(self.a).sum() self.nll1 = -betabinom(n=self.d, a=self.alpha1[0], b=self.alpha1[1]).logpmf(self.a).sum() else: # no overdispersion estimate desired / possible, estimate mean self.m1 = self.a.sum() / self.d.sum() self.nll0 = -binom(n=self.d, p=self.m0).logpmf(self.a).sum() self.nll1 = -binom(n=self.d, p=self.m1).logpmf(self.a).sum()
def plot_beta_binomial_fit(data, fit_results, title=None, x_label=None, x_range=None, y_range=None, fig_size=(6, 5), bin_width=1, filename=None): """ :param data: (numpy.array) observations :param fit_results: dictionary with keys "a", "b", "n", and "loc" :param title: title of the figure :param x_label: label to show on the x-axis of the histogram :param x_range: (tuple) x range :param y_range: (tuple) y range (the histogram shows the probability density so the upper value of y_range should be 1). :param fig_size: int, specify the figure size :param bin_width: bin width :param filename: filename to save the figure as """ plot_fit_discrete(data=data, dist=stat.betabinom(n=fit_results['n'], a=fit_results['a'], b=fit_results['b'], loc=fit_results['loc']), label='Beta-Binomial', bin_width=bin_width, title=title, x_label=x_label, x_range=x_range, y_range=y_range, fig_size=fig_size, filename=filename)
def compute_bb_nll(a, d, mu, theta): """Computes negative log-likelihood for Beta-Binomial model. Covers limit cases theta = 0 (Binomial) and theta = inf (Bernoulli). Args: a: Vector successes. d: Vector of trials. mu: Mean of the distribution. Has to be the same shape as a and d. theta: Dispersion parameter. Returns: Negative log-likelihood. """ a = atleast_2d_column(a) d = atleast_2d_column(d) mu = atleast_2d_column(mu) if (mu > 1).any() or (mu < 0).any(): raise ValueError('mu has to be between 0 and 1.') if a.size != d.size or a.size != mu.size: raise ValueError('a, d and mu have to be of the same size.') if theta < 0: raise ValueError('theta has to be non-negative.') if theta == 0: nll = -binom(n=d, p=mu).logpmf(a).sum() elif np.isinf(theta): nll = -binom(n=d > 0, p=mu).logpmf(a > 0).sum() else: alpha = reparameterize_polya_ms(np.hstack([mu, 1 - mu]), 1 / theta) nll = -betabinom(n=d, a=alpha[:, 0, np.newaxis], b=alpha[:, 1, np.newaxis]).logpmf(a).sum() return nll
def __call__(self, batch_size, n_possible_points): if self.range_indcs is not None: n_possible_points = self.range_indcs[1] - self.range_indcs[0] if np.random.uniform(size=1) < self.proba_uniform: # whether to sample from a uniform distribution instead of using a and b n_indcs = random.randint(0, n_possible_points) else: if self.is_beta_binomial: rv = betabinom(n_possible_points, self.a, self.b) n_indcs = rv.rvs() else: a = ratio_to_int(self.a, n_possible_points) b = ratio_to_int(self.b, n_possible_points) n_indcs = random.randint(a, b) if self.is_ensure_one and n_indcs < 1: n_indcs = 1 if self.is_batch_share: indcs = torch.randperm(n_possible_points)[:n_indcs] indcs = indcs.unsqueeze(0).expand(batch_size, n_indcs) else: indcs = (np.arange(n_possible_points).reshape( 1, n_possible_points).repeat(batch_size, axis=0)) indep_shuffle_(indcs, -1) indcs = torch.from_numpy(indcs[:, :n_indcs]) if self.range_indcs is not None: # adding is teh same as shifting indcs += self.range_indcs[0] return indcs
def test_betabinom(self): from scipy.stats import betabinom import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) n, a, b = 5, 2.3, 0.63 mean, var, skew, kurt = betabinom.stats(n, a, b, moments='mvsk') x = np.arange(betabinom.ppf(0.01, n, a, b), betabinom.ppf(0.99, n, a, b)) ax.plot(x, betabinom.pmf(x, n, a, b), 'bo', ms=8, label='betabinom pmf') ax.vlines(x, 0, betabinom.pmf(x, n, a, b), colors='b', lw=5, alpha=0.5) rv = betabinom(n, a, b) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) # plt.show() self.assertEqual("AxesSubplot(0.125,0.11;0.775x0.77)", str(ax))
def test_betabinom_a_and_b_unity(): # test limiting case that betabinom(n, 1, 1) is a discrete uniform # distribution from 0 to n n = 20 k = np.arange(n + 1) p = betabinom(n, 1, 1).pmf(k) expected = np.repeat(1 / (n + 1), n + 1) assert_almost_equal(p, expected)
def test_betabinom_bernoulli(): # test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b)) a = 2.3 b = 0.63 k = np.arange(2) p = betabinom(1, a, b).pmf(k) expected = bernoulli(a / (a + b)).pmf(k) assert_almost_equal(p, expected)
def beta_binomial_prior_distribution(phoneme_count, mel_count, scaling_factor=1.0): x = np.arange(0, phoneme_count) mel_text_probs = [] for i in range(1, mel_count + 1): a, b = scaling_factor * i, scaling_factor * (mel_count + 1 - i) mel_i_prob = betabinom(phoneme_count, a, b).pmf(x) mel_text_probs.append(mel_i_prob) return np.array(mel_text_probs)
def beta_binomial_prior_distribution(phoneme_count, mel_count): P, M = phoneme_count, mel_count x = np.arange(0, P) mel_text_probs = [] for i in range(1, M + 1): a, b = i, M + 1 - i rv = betabinom(P, a, b) mel_i_prob = rv.pmf(x) mel_text_probs.append(mel_i_prob) return torch.tensor(np.array(mel_text_probs))
def design_prior_filter(a, b, n): from scipy.stats import betabinom beta = betabinom(a=a, b=b, n=n) taps = beta.pmf([i for i in range(n, -1, -1)]) filter_length = 2 * (len(taps) - 1) + 1 filter_coef = np.zeros((filter_length, 1, 1)) filter_coef[:(n + 1), 0, 0] = taps print(f'priror filter : {filter_coef.flatten()}') return filter_coef
def plot(self, **kwargs): x = np.arange( betabinom.ppf(0.01, **self.parameters), betabinom.ppf(0.99, **self.parameters), ) plt.vlines( x, 0, betabinom(**self.parameters).pmf(x), label='pmf', **kwargs, )
def _reset_distribution(self): self._distribution: rv_discrete = betabinom( n=self._n, a=self._alpha, b=self._beta )
def _get_betabinom_probs(self, k, n, x, a): b = self._get_b(x, a) if (n, x, a) not in self.betabinomprobs: self.betabinomprobs[(n, x, a)] = betabinom(n, a, b) return (self.betabinomprobs[(n, x, a)].pmf(k))
msa = trim_terminals( read_fasta(f'../../ortho_MSA/realign_hmmer1/out/{OGid}.mfa')) # Create emission sequence emits = [] for j in range(len(msa[0][1])): col = [1 if msa[i][1][j] in ['-', '.'] else 0 for i in range(len(msa))] emits.append(sum(col)) # Instantiate model e_dists_rv = {} start_e_dists_rv = {} for state in ['1A', '3']: a, b = params['e_dists'][state] e_dists_rv[state] = betabinom_frozen(len(msa) - 1, a, b) start_e_dists_rv[state] = stats.betabinom(len(msa) - 1, a, b) for state in ['1B', '2']: a0, b0, a1, b1 = params['e_dists'][state] e_dists_rv[state] = ar1_betabinom_frozen(len(msa) - 1, a0, b0, a1, b1) start_e_dists_rv[state] = get_stationary_dist( len(msa) - 1, a0, b0, a1, b1) model = hmm.ARHMM(params['t_dists'], e_dists_rv, params['start_t_dist'], start_e_dists_rv) # Decode states and plot fbs = model.forward_backward(emits) draw.plot_msa_lines([seq.upper() for _, seq in msa], [fbs['1A'], fbs['2'], fbs['3'], fbs['1B']], figsize=(15, 6)) plt.savefig(f'out/{OGid}_wide.png', bbox_inches='tight') plt.close()
def __init__(self, n, a, b): self.n = n self.a = a self.b = b self.dist = stats.betabinom(n, a, b)
def fit_betamix(X_N, X, num_betas, debug=False): # Thetas N = X_N.max() thetas = np.random.randint(low=2, high=10, size=(num_betas, 2)) # Mixing coefficients: alphas = np.random.rand(num_betas) alphas /= alphas.sum() ROUNDS = 200 if not debug else 1 QUIET = True lls = [] for r in range(ROUNDS): dists = [betabinom(X_N, *theta) for theta in thetas] # E Step memberships = np.stack([a * d.pmf(X) for a, d in zip(alphas, dists)]) memberships = memberships / memberships.sum(axis=0) # M Step fit_args = { 'samples_N': X_N, 'samples': X, 'weights': memberships, 'guess': thetas, } for i in range(MAX_RETRIES): new_thetas = fit_betabinom(**fit_args) fit_args['guess'] = np.random.randint(low=2, high=10, size=(num_betas, 2)) success = not (new_thetas is None) if success: break print('RETRYING...') if success: pass else: if len(lls) > 1: den = np.max([lls[-1], lls[-2]]) rel_improv = np.abs(lls[-1] - lls[-2]) / den else: rel_improv = float('inf') if rel_improv < 0: # nono REL_IMPROVE_THRESH: # if we have just hit convergence early new_thetas = thetas else: uid = uuid4() fn = f'err_{uid}_{num_betas}.pt' p = f'/data/theory/robustopt/engstrom/store/bayes/{fn}' fit_args.update({'round': r, 'lls': np.array(lls)}) ch.save(fit_args, p) raise ValueError('Opt did not converge!') return None, None thetas = new_thetas likelihood = fit_betabinom(X_N, X, memberships, guess=thetas, optimize=False) lls.append(likelihood) alphas = memberships.sum(axis=1) / memberships.sum() with np.printoptions(precision=2, suppress=True): print('-----') print(f"Round {r} done") print(f"Theta: \n {np.array(thetas)}") print(f"Alpha: {alphas}") print(f"Likelihood: {likelihood}") ll = likelihood ret = BBinomialMixture(N, thetas, alphas), BMixture(thetas, alphas), ll return ret
def _reset_distribution(self): self._distribution = betabinom(self._n, self._alpha, self._beta)
def _f(params): a, b = params pmf_val = betabinom(samples_N, a, b).logpmf(samples) assert pmf_val.shape == w.shape return -(pmf_val * w).mean()
def pmf(self, samples): total_pmf = 0.0 for (a, b), alpha in zip(self.thetas, self.alphas): dist = betabinom(self.N, a, b) total_pmf += alpha * dist.pmf(samples) return total_pmf