Exemple #1
0
    def test_betabinom(self):
        from scipy.stats import betabinom
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 1)

        n, a, b = 5, 2.3, 0.63
        mean, var, skew, kurt = betabinom.stats(n, a, b, moments='mvsk')

        x = np.arange(betabinom.ppf(0.01, n, a, b),
                      betabinom.ppf(0.99, n, a, b))
        ax.plot(x,
                betabinom.pmf(x, n, a, b),
                'bo',
                ms=8,
                label='betabinom pmf')
        ax.vlines(x, 0, betabinom.pmf(x, n, a, b), colors='b', lw=5, alpha=0.5)

        rv = betabinom(n, a, b)
        ax.vlines(x,
                  0,
                  rv.pmf(x),
                  colors='k',
                  linestyles='-',
                  lw=1,
                  label='frozen pmf')
        ax.legend(loc='best', frameon=False)
        #        plt.show()
        self.assertEqual("AxesSubplot(0.125,0.11;0.775x0.77)", str(ax))
Exemple #2
0
    def __init__(self, query_dim, attn_dim, static_channels=8, static_kernel_size=21,
                dynamic_channels=8, dynamic_kernel_size=21, prior_length=11, 
                alpha=0.1, beta=0.9, score_mask_value=-float("inf")):
        super(DynamicConvolutionAttention, self).__init__(query_dim, attn_dim, score_mask_value)
        self.prior_length = prior_length
        self.dynamic_channels = dynamic_channels
        self.dynamic_kernel_size = dynamic_kernel_size

        P = betabinom.pmf(np.arange(prior_length), prior_length - 1, alpha, beta)

        self.register_buffer("P", torch.FloatTensor(P).flip(0))
        self.W = nn.Linear(query_dim, attn_dim)
        self.V = nn.Linear(
            attn_dim, dynamic_channels * dynamic_kernel_size, bias=False
        )
        self.F = nn.Conv1d(
            1,
            static_channels,
            static_kernel_size,
            padding=(static_kernel_size - 1) // 2,
            bias=False,
        )
        self.U = nn.Linear(static_channels, attn_dim, bias=False)
        self.T = nn.Linear(dynamic_channels, attn_dim)
        self.v = nn.Linear(attn_dim, 1, bias=False)
Exemple #3
0
def add_likelihood_info_bayes(nline, a, b):
    '''
    Adds additional fields to a vcf info line describing the likelihood of producing the site and predicted frequency, with and without testing correction. Uses the bayesian beta posterior mean estimator.
    '''
    chro, pos, rid, ref, alt, qual, rfil, info = nline.strip().split()
    altset = alt.split(',')
    #need alt count and depth value from info
    iv = info.split(';')
    dp = int(iv[0].strip("DP="))
    arats = [float(v) for v in iv[1].strip('AF=').split(',')]
    if len(altset) != len(arats):
        print('Mismatch', nline)

    ninfo = ''
    for i, ar in enumerate(arats):
        alt_count = round(
            ar * dp
        )  #should be nearly intable, but floating point math and rounding off can make problems sometimes.
        #the structure of the new information will go as follows:
        #for each alt, there will be a series of comma delineated entries
        #that represent the most likely frequency and overall likelihood of the site
        #this is uncorrected so don't go thinking things are so super special or anything.
        #if I have one mutation that's an A, I'll add ";A=.003,.000001" for example.
        likelihood = betabinom.pmf(n=dp, a=a, b=b, k=alt_count)
        #calculate the most likely frequency.
        #using the simple bayesian beta posterior mean equation
        #phat = (x + a) / (n + a + b)
        lf = (alt_count + a) / (dp + a + b)
        #add information to the line.
        ninfo += ';' + altset[i] + '=f:' + str(lf) + ',l:' + str(likelihood)
    return '\t'.join([chro, pos, rid, ref, alt, qual, rfil, info + ninfo])
Exemple #4
0
def add_likelihood_info_mle(nline, a, b):
    '''
    Adds additional fields to a vcf info line describing the likelihood of producing the site, with and without testing correction. Uses an MLE approach.
    '''
    chro, pos, rid, ref, alt, qual, rfil, info = nline.strip().split()
    altset = alt.split(',')
    #need alt count and depth value from info
    iv = info.split(';')
    dp = int(iv[0].strip("DP="))
    arats = [float(v) for v in iv[1].strip('AF=').split(',')]
    if len(altset) != len(arats):
        print('Mismatch', nline)

    ninfo = ''
    for i, ar in enumerate(arats):
        alt_count = round(ar * dp)  #should always be intable.
        #the structure of the new information will go as follows:
        #for each alt, there will be a series of comma delineated entries
        #that represent the most likely frequency and overall likelihood of the site
        #this is uncorrected so don't go thinking things are so super special or anything.
        #if I have one mutation that's an A, I'll add ";A=.003,.000001" for example.
        likelihood = betabinom.pmf(n=dp, a=a, b=b, k=alt_count)
        #calculate the most likely frequency.
        #using dynamic programming to save on runtime.
        if (alt_count, dp) in ldf:
            lf = ldf[(alt_count, dp)]
        else:
            lf = fmin(func=get_fp,
                      x0=.01,
                      args=(a, b, dp, alt_count),
                      disp=False)
            ldf[(alt_count, dp)] = lf[0]
        #add information to the line.
        ninfo += ';' + altset[i] + '=f:' + str(lf) + ',l:' + str(likelihood)
    return '\t'.join([chro, pos, rid, ref, alt, qual, rfil, info + ninfo])
Exemple #5
0
    def computeBetaWeights(self, k=.6):
        strikeList = self.optionChain.getStrikeList()
        n = len(strikeList)
        x = np.arange(0, n)

        weights = betabinom.pmf(x, n - 1, k, k)
        weights = pd.Series(weights, index=self.optionChain.getStrikeList())

        return weights
    def _compute_posterior_class_probability(self, k,
                                             n) -> Tuple[float, float]:
        """
        For given parameters of beta distributions for both classes, computes the posterior class probabilities:

        .. math::

            p(c' = x | n', k')= \\binom{n'}{k'} \\frac{B(k'+\\alpha_x, n' - k' + \\beta_x)}{B(\\alpha_x, \\beta_x)} \\frac{N_x + 1}{N + 2}, x=0,1

        Arguments:

            k: number of disease-associated sequences
            n: total number of sequences

        Returns:

            a tuple of probabilities for negative class and positive class for given example, normalized to sum to 1

        """
        predicted_probability_0 = beta_binomial.pmf(
            k, n, self.alpha_0,
            self.beta_0) * (self.N_0 + 1) / (self.N_0 + self.N_1 + 2)
        predicted_probability_1 = beta_binomial.pmf(
            k, n, self.alpha_1,
            self.beta_1) * (self.N_1 + 1) / (self.N_0 + self.N_1 + 2)

        normalization_const = predicted_probability_0 + predicted_probability_1

        if np.isnan(normalization_const):
            raise ValueError(
                f"{ProbabilisticBinaryClassifier.__name__}: encountered nan in predicted posterior class probabilities."
                f"\nprobability of class 0: {predicted_probability_0}\nprobability of class 1: {predicted_probability_1}\n"
                f"alpha 0: {self.alpha_0}, beta 0: {self.beta_0}\nalpha 1: {self.alpha_1}, beta 1: {self.beta_1}\n"
                f"positive example count: {self.N_1}, negative example count: {self.N_0}"
            )
        elif normalization_const == 0:
            warnings.warn(
                f"{ProbabilisticBinaryClassifier.__name__}: posterior class probabilities for both classes are 0 (k={k}, n={n}). Returning "
                f"normalized values to indicate that the example could not be classified, by setting both probabilities to 0.5.",
                RuntimeWarning)
            return 0.5, 0.5

        return predicted_probability_0 / normalization_const, predicted_probability_1 / normalization_const
Exemple #7
0
def iter_threshold(obs_mismatches,
                   obs_positions,
                   unobs_positions,
                   alpha,
                   beta,
                   d_half,
                   threshold_fcn=soft_species_probability):
    for mm in range(unobs_positions + 1):
        p_mm = betabinom.pmf(mm, unobs_positions, alpha, beta)
        d = pctdiff(obs_mismatches, obs_positions, mm, unobs_positions)
        p_species = threshold_fcn(d, d_half)
        if p_species < 1e-10:
            break
        yield (mm, p_mm, d, p_species)
Exemple #8
0
    def __init__(
        self,
        query_dim,
        embedding_dim,  # pylint: disable=unused-argument
        attention_dim,
        static_filter_dim,
        static_kernel_size,
        dynamic_filter_dim,
        dynamic_kernel_size,
        prior_filter_len=11,
        alpha=0.1,
        beta=0.9,
    ):
        super().__init__()
        self._mask_value = 1e-8
        self.dynamic_filter_dim = dynamic_filter_dim
        self.dynamic_kernel_size = dynamic_kernel_size
        self.prior_filter_len = prior_filter_len
        self.attention_weights = None
        # setup key and query layers
        self.query_layer = nn.Linear(query_dim, attention_dim)
        self.key_layer = nn.Linear(attention_dim,
                                   dynamic_filter_dim * dynamic_kernel_size,
                                   bias=False)
        self.static_filter_conv = nn.Conv1d(
            1,
            static_filter_dim,
            static_kernel_size,
            padding=(static_kernel_size - 1) // 2,
            bias=False,
        )
        self.static_filter_layer = nn.Linear(static_filter_dim,
                                             attention_dim,
                                             bias=False)
        self.dynamic_filter_layer = nn.Linear(dynamic_filter_dim,
                                              attention_dim)
        self.v = nn.Linear(attention_dim, 1, bias=False)

        prior = betabinom.pmf(range(prior_filter_len), prior_filter_len - 1,
                              alpha, beta)
        self.register_buffer("prior", torch.FloatTensor(prior).flip(0))
Exemple #9
0
    def __init__(
        self,
        attn_rnn_size,
        hidden_size,
        static_channels,
        static_kernel_size,
        dynamic_channels,
        dynamic_kernel_size,
        prior_length,
        alpha,
        beta,
    ):
        super(DynamicConvolutionAttention, self).__init__()

        self.prior_length = prior_length
        self.dynamic_channels = dynamic_channels
        self.dynamic_kernel_size = dynamic_kernel_size

        P = betabinom.pmf(np.arange(prior_length), prior_length - 1, alpha,
                          beta)

        self.register_buffer("P", torch.FloatTensor(P).flip(0))
        self.W = nn.Linear(attn_rnn_size, hidden_size)
        self.V = nn.Linear(hidden_size,
                           dynamic_channels * dynamic_kernel_size,
                           bias=False)
        self.F = nn.Conv1d(
            1,
            static_channels,
            static_kernel_size,
            padding=(static_kernel_size - 1) // 2,
            bias=False,
        )
        self.U = nn.Linear(static_channels, hidden_size, bias=False)
        self.T = nn.Linear(dynamic_channels, hidden_size)
        self.v = nn.Linear(hidden_size, 1, bias=False)
Exemple #10
0
def ppr(k, N0, a, b, t, kt):
    assert (kt <= t)
    with np.errstate(divide='ignore', over='ignore'):
        val = betabinom.pmf(k, N0, a, b) / betabinom.pmf(
            k - kt, N0 - t, a + kt, b + t - kt)
    return val
Exemple #11
0
 def pdf(self, x: float) -> float:
     k = int(x)
     return float(betabinom.pmf(k, self.n, self.alpha, self.beta))
Exemple #12
0
ax2 = fig.add_subplot(212)

# ベータ分布のグラフ
x_beta = np.linspace(beta.ppf(0, a, b), beta.ppf(1, a, b), size)
x_betabinom = np.arange(betabinom.ppf(0, n, a, b), betabinom.ppf(1, n, a,
                                                                 b)) + 1
ax1.plot(x_beta,
         beta.pdf(x_beta, a, b),
         color="blue",
         label=f"Beta : a = {a}, b = {b}")
ax1.legend(loc='upper center', fontsize=30)
ax1.tick_params(labelsize=24)

# ベータ2項分布のグラフ
ax2.plot(x_betabinom,
         betabinom.pmf(x_betabinom, n, a, b),
         color="red",
         alpha=0.5,
         label=f"Beta Binomial : a = {a}, b = {b}")
ax2.vlines(x_betabinom,
           0,
           betabinom.pmf(x_betabinom, n, a, b),
           lw=16,
           alpha=0.55,
           color="red")
ax2.legend(loc="upper center", fontsize=30)
ax2.tick_params(labelsize=24)
fig.tight_layout()

# 可視化
st.subheader("Visualization of the probability functions")