def sample_cfdna_allele(p, tot_counts, bbdisp): """ Sample reads at site from a beta-binomial distribution, where the probability of sampling the A allele is based on the proportion of the A allele in the maternal-fetal mixture drawn from a betabinomial distribution with dispersion parameter bbdisp. If the beta-binomial parameter is set to np.inf, then the beta-binomial distribution collapses to the binomial. Args: p (float): proportion of A allele in maternal-fetal mix tot_counts (int): the number of reads at the site (depth). bbdisp (float): beta-binomial dispersion parameter in allele counts Returns: The number of A allele reads. Note: If bbdisp == np.inf the beta-binomial distribution collapses to the simple binomial. Raises: AssertionError: If p < 0 or > 1 AssertionError: If bbdisp < 1 """ assert bbdisp >= 1, "Value of bbdisp must be > 1" assert 0 <= p <= 1, "Value of p must be > 0 and < 1" #At probs 0 or 1 the beta parameters are undefined, therefore default to #sampling from a binomial distribution with p= 0 or 1. Always apply the #sampling variance from the most abundant allele. flip = False if p != 0 and p != 1 and bbdisp != np.inf: if p < 0.5: p = 1-p flip = True a = bbdisp b = (bbdisp / p) - bbdisp if flip: return tot_counts - pymc.rbetabin(a, b, tot_counts) else: return pymc.rbetabin(a, b, tot_counts) else: return np.random.binomial(tot_counts, p)
def pred(pi=pi, alpha=alpha, beta=beta): return mc.rbetabin(alpha, beta, n)
def p_pred(pi=pi, delta=delta, n=n_nonzero): return mc.rbetabin(alpha=pi[~i_zero] * delta[~i_zero] * 50, beta=(1 - pi[~i_zero]) * delta[~i_zero] * 50, n=n[~i_zero]) / pl.array(n + 1.e-9, dtype=float)