Beispiel #1
0
def test_issue_7406():
    np.random.seed(0)
    assert_equal(binom.ppf(np.random.rand(10), 0, 0.5), 0)

    # Also check that endpoints (q=0, q=1) are correct
    assert_equal(binom.ppf(0, 0, 0.5), -1)
    assert_equal(binom.ppf(1, 0, 0.5), 0)
Beispiel #2
0
def plot_acuity(logmar,
                accuracy,
                yerror,
                n_validation,
                name,
                conditions,
                condition_name,
                plot_directory,
                unit_label="logMAR"):
    print(f"plotting {name} {condition_name} classification accuracy.")
    sig5 = np.repeat(
        binom.ppf(0.95, n_validation, 0.5) / n_validation, len(logmar))
    sig1 = np.repeat(
        binom.ppf(0.99, n_validation, 0.5) / n_validation, len(logmar))

    fig, ax = plt.subplots()
    nconditions = len(conditions)
    for condition in range(nconditions):
        if type(conditions[condition]) is float:
            label = f'{conditions[condition]:.2f}'
        else:
            label = f'{conditions[condition]}'

        ax.errorbar(logmar,
                    accuracy[condition],
                    marker='o',
                    markersize=4,
                    capsize=4,
                    yerr=yerror[condition],
                    label=label)
    ax.plot(logmar, sig1, 'k--')
    # ax.plot(logmar, sig1, 'k--', label='p<0.01')
    ax.set_ylabel("Accuracy")
    ax.set_xlabel(unit_label)
    if unit_label == "logMAR":
        x_major_ticks = np.arange(1.6, 3.2, 0.2)
        ax.set_xticks(x_major_ticks)
        ax.set_xlim(1.5, 3.25)
    elif unit_label == "cpd":
        pass
    # ax.set_xticks(minor_ticks, minor=True)
    # ax.set_yticks(major_ticks)
    # ax.set_yticks(minor_ticks, minor=True)

    ax.grid(which='both')

    ax.set_ylim(0.35, 1.05)
    ax.legend(loc=(1, 0.1))
    if condition_name is None:
        ax.set_title(f'{name} classification by binning technique')
        fig.tight_layout()
        fig.savefig(os.path.join(plot_directory, f"{name}_acuity.png"))
    else:
        ax.set_title(f'{name} classification by {condition_name}')
        fig.tight_layout()
        fig.savefig(
            os.path.join(plot_directory,
                         f"{name}-{condition_name}_acuity.png"))
Beispiel #3
0
def get_m_n_from_bernoulli(N):
    p, P_B = 0.05, 0.05
    m_n_bernoulli = np.arange(1, N) * np.nan
    for n in np.arange(1, N):
        x = np.arange(binom.ppf(0.00, n, p), binom.ppf(1.00, n, p))
        prob = binom.sf(x, n, p)
        m = find_m(prob, P_B)
        m_n_bernoulli[n - 1] = m * 1. / n
    return (m_n_bernoulli)
def occurrence_error(n_planets, rate):
    try:
        n = n_planets/rate
        p = rate
        high = rate*n_planets/binom.ppf(0.159, n, p)
        low = rate*n_planets/binom.ppf(0.841, n, p)
        return rate - low,high - rate
    except:
        return np.nan, np.nan
Beispiel #5
0
    def test_round(self):
        random_state = RandomState()
        avg = 3.4
        samples = 1000

        obs_avg = np.mean([random_state.round(avg) for i in range(samples)])
        min = np.floor(avg) + binom.ppf(0.001, n=samples, p=avg % 1) / samples
        max = np.floor(avg) + binom.ppf(0.999, n=samples, p=avg % 1) / samples
        self.assertGreater(obs_avg, min)
        self.assertLess(obs_avg, max)
Beispiel #6
0
def lc_plot(n, k, p, titlename, outname=None):
    # plt.plot(prob)
    # if p < 0.001:
    #     plt.title(titlename + f'\np < 0.001',fontsize=10)
    # else:
    #     plt.title(titlename + '\n' + 'p = ' + '%.3f' %p, fontsize=10)

    # plt.plot([k,k],[prob[k],prob[k]],'.', markersize=10)
    # plt.plot([k,k],[0,0.06],'--', markersize=15)
    # plt.xlabel('Number of correct predictions',fontsize=8)
    # plt.ylabel('Probability', fontsize=8)

    # fig, ax = plt.subplots(1, 1, figsize = (a, b))

    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(1, 1, 1)

    x = np.arange(binom.ppf(0.01, n, p), binom.ppf(0.99, n, p))
    ax.plot(x,
            binom.pmf(x, n, p),
            'bo',
            color='gray',
            ms=5,
            label='binomial probability mass function')
    ax.vlines(x, 0, binom.pmf(x, n, p), colors='gray', lw=5, alpha=0.5)

    rv = binom(n, p)
    ax.vlines(x,
              0,
              rv.pmf(x),
              colors='k',
              linestyles='-',
              lw=1,
              label='frozen probability mass function')

    # plt.plot([k,k],[prob[k],prob[k]],'.', markersize=10)
    # plt.plot([k,k],[0,0.06],'--', markersize=15)

    # ax.legend(loc='lower right', frameon=False)
    num1 = 1.1
    num2 = 1
    num3 = 1
    num4 = 0.5
    ax.legend(bbox_to_anchor=(num1, num2), loc=num3, borderaxespad=num4)
    plt.title(titlename)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_linewidth(2)
    ax.spines['left'].set_linewidth(2)

    if outname: plt.savefig(outname)

    plt.show()
Beispiel #7
0
    def test_merge_by_weight(self):
        selected_counts = {0: 0, 1: 0}
        alpha = 0.01
        nrounds = 1000
        from scipy.stats import binom

        # lower and upper bounds of 95% CI for selecting the segment with weight 1/3
        lb = binom.ppf(alpha / 2.0, nrounds, 1.0 / 3.0)
        ub = binom.ppf(1.0 - alpha / 2.0, nrounds, 1.0 / 3.0)

        system = WESTSystem()
        system.bin_mapper = RectilinearBinMapper([[0.0, 1.0]])
        system.bin_target_counts = np.array([1])
        system.pcoord_len = 2
        self.we_driver = WEDriver(system=system)
        self.system = system
        self._seg_id = 0

        segments = [
            Segment(n_iter=1,
                    seg_id=0,
                    pcoord=np.array([[0], [0.25]], dtype=np.float32),
                    weight=1.0 / 3.0),
            Segment(n_iter=1,
                    seg_id=1,
                    pcoord=np.array([[0], [0.75]], dtype=np.float32),
                    weight=2.0 / 3.0),
        ]

        for _iround in range(nrounds):
            for segment in segments:
                segment.endpoint_type = Segment.SEG_ENDPOINT_UNSET

            self.we_driver.new_iteration()
            self.we_driver.assign(segments)
            self.we_driver.construct_next()

            assert len(self.we_driver.next_iter_binning[0]) == 1
            newseg = self.we_driver.next_iter_binning[0].pop()

            assert segments[
                newseg.
                parent_id].endpoint_type == Segment.SEG_ENDPOINT_CONTINUES
            assert segments[
                ~newseg.parent_id].endpoint_type == Segment.SEG_ENDPOINT_MERGED

            selected_counts[newseg.parent_id] += 1

        print(selected_counts)
        assert (
            lb <= selected_counts[0] <= ub
        ), 'Incorrect proportion of histories selected.' 'this is expected about {:%} of the time; retry test.'.format(
            alpha)
Beispiel #8
0
def calculate_CI(len_samples, confidence_level=0.95, n_points=1001):
    """
    (https://git.ligo.org/lscsoft/bilby/blob/master/bilby/core/result.py#L1578)
    """
    x_values = np.linspace(0, 1, n_points)
    N = len_samples
    edge_of_bound = (1. - confidence_level) / 2. 
    lower = binom.ppf(1 - edge_of_bound, N, x_values) / N
    upper = binom.ppf(edge_of_bound, N, x_values) / N
    lower[0] = 0
    upper[0] = 0
    return x_values, upper, lower
Beispiel #9
0
    def test_round_midpoint(self):
        random_state = RandomState()

        self.assertEqual(random_state.round_midpoint(3.4), 3)
        self.assertEqual(random_state.round_midpoint(3.6), 4)

        avg = 3.5
        samples = 2000
        obs_avg = np.mean(
            [random_state.round_midpoint(avg) for i in range(samples)])
        min = np.floor(avg) + binom.ppf(0.0001, n=samples, p=avg % 1) / samples
        max = np.floor(avg) + binom.ppf(0.9999, n=samples, p=avg % 1) / samples
        self.assertGreaterEqual(obs_avg, min)
        self.assertLessEqual(obs_avg, max)
Beispiel #10
0
def fun_CI_builder(l_subr, pd_order_z, f_delta_k, f_alpha_k, f_epsilon):
    f_vol_S = l_subr[0].f_volume
    f_vol_C = sum(c.f_volume for c in l_subr if c.s_label == 'C' and c.b_activate is True)
    f_vol_P = sum(c.f_volume for c in l_subr if c.s_label == 'P' and c.b_activate is True)
    f_vol_M = sum(c.f_volume for c in l_subr if c.s_label == 'M' and c.b_activate is True)
    f_delta_kl = f_delta_k - float(f_vol_P * f_epsilon) / (f_vol_S * f_vol_C)
    f_delta_ku = f_delta_k + float(f_vol_M * f_epsilon) / (f_vol_S * f_vol_C)
    f_max_r = binom.ppf(f_alpha_k / 2, len(pd_order_z), f_delta_kl)
    f_min_s = binom.ppf(1 - f_alpha_k / 2, len(pd_order_z), f_delta_ku)
    if math.isnan(f_max_r) is True:
        f_max_r = 0
    CI_l = pd_order_z.loc[f_max_r, 'mean']
    CI_u = pd_order_z.loc[f_min_s, 'mean']
    return [CI_u, CI_l]
Beispiel #11
0
def test_issue_5122():
    p = 0
    n = np.random.randint(100, size=10)

    x = 0
    ppf = binom.ppf(x, n, p)
    assert_equal(ppf, -1)

    x = np.linspace(0.01, 0.99, 10)
    ppf = binom.ppf(x, n, p)
    assert_equal(ppf, 0)

    x = 1
    ppf = binom.ppf(x, n, p)
    assert_equal(ppf, n)
def binom_confidence_interval(alpha, N_discr, p_discr):
    '''
        Two-sided confidence interval of size 1-p_discr for binomial
        probability parameter given N_discr.

        Equivalently, using a two-sided test
        with significance level p_discr for alpha \\neq beta, the null
        hypothesis will not be rejected if beta is in the interval
        (lower, upper) and N_discr is the number of trials and
        beta*N_discr is the number of successfull tirals.

    '''
    lower = binom.ppf(p_discr / 2, N_discr, alpha) * 1. / N_discr
    upper = binom.ppf(1 - p_discr / 2, N_discr, alpha) * 1. / N_discr
    return lower, upper
Beispiel #13
0
def binom_confidence_interval(alpha, N_discr, p_discr):
    '''
        Two-sided confidence interval of size 1-p_discr for binomial
        probability parameter given N_discr.

        Equivalently, using a two-sided test
        with significance level p_discr for alpha \\neq beta, the null
        hypothesis will not be rejected if beta is in the interval
        (lower, upper) and N_discr is the number of trials and
        beta*N_discr is the number of successfull tirals.

    '''
    lower = binom.ppf(p_discr/2, N_discr, alpha)*1./N_discr
    upper = binom.ppf(1-p_discr/2, N_discr, alpha)*1./N_discr
    return lower, upper
Beispiel #14
0
def err(ci, k, j):
    n = binom.ppf(ci, k, j)
    if n == 0:
        #this is an edge case, so we report a big error
        return 1e9
    else:
        return abs(n / (j * k) - 1)
def mcnemar_test(cont_table):
    """Found in statsmodels as mcnemar
    Used when we have paired nominal data that is organized in a 2x2 contingency table. It is used to test the
    assumption that the marginal column and row probabilities are equal, i.e., that the probability that b and c
    are equivalent.

    Parameters
    ----------
    cont_table: list or numpy array, 2 x 2
        A 2x2 contingency table

    Return
    ------
    chi_squared: float
        Our Chi statistic, or the sum of differences between b and c
    p: float, 0 <= p <= 1
        The probability that b and c aren't equivalent due to chance
    """
    cont_table = _check_table(cont_table, True)
    if cont_table.shape != (2, 2):
        raise AttributeError(
            "McNemar's Test is meant for a 2x2 contingency table")
    b, c = cont_table[0, 1], cont_table[1, 0]
    if b + c > 25:
        chi_squared = pow(abs(b - c) - 1, 2) / (b + c)
        p = 1 - chi2.cdf(chi_squared, 1)
    else:
        chi_squared = min(b, c)
        p = 2 * binom.cdf(chi_squared, b + c, 0.5) - binom.pmf(
            binom.ppf(0.99, b + c, 0.5), b + c, 0.5)
    return chi_squared, p
Beispiel #16
0
def binomInvCDF(prob, n, p, loc):
    if 0 < p and p < 1:
        return 
    if isinstance(n, int) and n > 0:
        raise Exception("Error: n must be a positive integer")
    value = binom.ppf(prob, n, p, x = loc)
    return value
Beispiel #17
0
def qbinom(q, size=1, prob=0.5, lowertail=True):
    """
    ============================================================================
                                                                        qbinom()
    ============================================================================
    The quantile function for the binomial distribution.
    You provide a quantile (eg q=0.75) or array of quantiles, and it returns the
    value along the binomial distribution that corresponds to the qth quantile.

    USAGE:
    dbinom(x, size, prob=0.5, log=False)
    pbinom(q, size, prob=0.5, lowertail=True, log=False)
    qbinom(p, size, prob=0.5, lowertail=True)
    rbinom(n=1, size=1, prob=0.5)

    :param q:       float. or array of floats. The quantile ()
    :param size:    int. Number of trials
    :param prob:    float. Probability of a success
    :param log:     bool. take the log?
    :return:        an array of the value(s) corresponding to the quantiles q
    ============================================================================
    """
    # TODO: BUG: qbinom(0, size=11, prob=0.3) gives -1. It should be 0
    # TODO: check that q is between 0.0 and 1.0

    if lowertail:
        return binom.ppf(q=q, n=size, p=prob)
    else:
        return binom.isf(q=q, n=size, p=prob)
Beispiel #18
0
def err(ci, k, j):
    n = binom.ppf(ci, k, j)
    if n == 0:
        #this is an edge case, so we report a big error
        return 1e9
    else:
        return abs(n/(j*k) - 1)
Beispiel #19
0
def qbinom(q, size=1, prob=0.5, lowertail=True):
    """
    ============================================================================
                                                                        qbinom()
    ============================================================================
    The quantile function for the binomial distribution.
    You provide a quantile (eg q=0.75) or array of quantiles, and it returns the
    value along the binomial distribution that corresponds to the qth quantile.

    USAGE:
    dbinom(x, size, prob=0.5, log=False)
    pbinom(q, size, prob=0.5, lowertail=True, log=False)
    qbinom(p, size, prob=0.5, lowertail=True)
    rbinom(n=1, size=1, prob=0.5)

    :param q:       float. or array of floats. The quantile ()
    :param size:    int. Number of trials
    :param prob:    float. Probability of a success
    :param log:     bool. take the log?
    :return:        an array of the value(s) corresponding to the quantiles q
    ============================================================================
    """
    # TODO: BUG: qbinom(0, size=11, prob=0.3) gives -1. It should be 0
    # TODO: check that q is between 0.0 and 1.0

    if lowertail:
        return binom.ppf(q=q, n=size, p=prob)
    else:
        return binom.isf(q=q, n=size, p=prob)
Beispiel #20
0
def get_FDR_cutoff_binom(readlengths, genelength, alpha=0.05, mincut=2):
    '''
    model peak height by binomial distribution, return the FDR_cutoff(no. reads needed to reach FDR)

    :param readlengths: list, list of read length in a genomic region
    :param genelength: int, length of genomie region
    :param alpha: float, default = 0.05, FDR alpha value
    :param mincut: int, default 2, minimal peak height (no. reads per position). if if FDR cutoff < mincut, return mincut
    :return: int, minimal peak height required to reach FDR
    '''
    number_reads = len(readlengths)

    if number_reads == 0:
        return mincut
    else:
        read_length = np.array(readlengths)
        mean_read_length = np.mean(read_length)
        prob = float(mean_read_length) / float(genelength)
        if prob > 1:
            raise ValueError("probability of >= 1 read per-base > 1")
        try:
            k = int(
                binom.ppf(1 - (alpha), number_reads, prob)
            )  # percent point function (ppf) inverse of cdf; which number of reads we need tp
            if k < mincut:
                return mincut
            else:
                return k
        except:
            print(read_length, mean_read_length, genelength, prob, alpha,
                  number_reads)
            raise
Beispiel #21
0
    def _compute_bad_bins_all_channels(self):
        tces_to_remove = self.transits[
            self.mask_transits_in_bad_bins].tce.unique()
        mask_transits_to_remove = self.transits.tce.isin(tces_to_remove)

        # Make a cut across all channels
        total_transit_count = self.transits[~mask_transits_to_remove].groupby(
            'bin_id').size()
        n_tces = self.transits[~mask_transits_to_remove].tce.unique().size
        p_transit = total_transit_count.median() / n_tces
        total_count_threshold = binom.ppf(
            1 - self.probability_threshold_combined, int(n_tces), p_transit)
        # print('n={} p={}'.format(n_tces, p_transit))

        bins_to_remove = total_transit_count[
            total_transit_count > total_count_threshold].index
        print('Identified {} bad bins for all channels.'.format(
            len(bins_to_remove)))

        self.mask_transits_in_bad_bin_ids = self.transits['bin_id'].isin(
            bins_to_remove)
        print('Flagged {} out of {} transits as suspicious.'.format(
            self.mask_transits_in_bad_bin_ids.sum(),
            len(self.mask_transits_in_bad_bin_ids)))

        return bins_to_remove
Beispiel #22
0
    def _compute_binomial_thresholds(self):
        rates = self.rates
        tce_expect_col, rate_expected_col, rate_threshold_col = [], [], []
        for skygroup, season in rates.index:
            mask_reference = (
                (rates.index.get_level_values('skygroup') == skygroup) &
                (rates.index.get_level_values('season') != season)
                & ~rates.channel.isin(OUTLIER_CHANNELS))

            # Compute the probability for a TCE to produce a transit in a given bin
            n_transits_per_bin = self.binsize * rates[
                mask_reference].transits_per_day
            n_tces = rates[mask_reference].n_tces
            mean_transit_probability = (n_transits_per_bin / n_tces).mean()

            rate_expected_col.append(n_transits_per_bin.median())
            tce_expect_col.append(n_tces.median())
            rate_threshold_col.append(
                int(
                    binom.ppf(1 - self.probability_threshold,
                              int(n_tces.median()), mean_transit_probability)))

        rates['n_tces_expected'] = tce_expect_col
        rates['transit_rate_expected'] = rate_expected_col
        rates['transit_rate_threshold'] = rate_threshold_col
        return rates
    def binomial_dist(self, value, bound_min, bound_max, population):

        # FIXME Population should never be inferior to zero !

        if population > 0:

            # Probability of picking a number
            # - between bound_min and bound_max
            # - AND smaller than value

            p = (value - bound_min) / (bound_max - bound_min)
            assert 0 <= p <= 1, "Not a probability !?"

            # We use the inverse-CDF method to pick a random
            # number in [0,population] that follows a.
            # binomial distribution

            q = np.random.uniform()
            return binom.ppf(q, population, p)

        else:

            moving_population = 0
            #print("population {}".format(population))
            for i in range(int(population)):
                proba = random.uniform(bound_min, bound_max)
                if proba < value:
                    moving_population += 1

            return moving_population
def get_binomial_table(p = 0.5, alpha = 0.05, trial_range = 8):
    '''Compute the numbers of points from the :math:`\\delta`-neighborhood, which need to fall outside the :math:`\\varepsilon`-neighborhood, in order to reject 
    the Null Hypothesis at a significance level :math:`\\alpha`.

    Parameters
    ----------
    p : `float`, optional
        Binominal p (Default is `p = 0.5`).
    alpha : `float`, optional
        Significance level in order to be able to reject the Null on the basis of the binomial distribution (Default is `alpha = 0.05`).
    trial_range : `int`, optional
        Number of considered delta-neighborhood-points (Default is `trial_range = 8`).
    
    Returns
    -------
    delta_to_epsilon_amount : `dict`
        A dictionary with `delta_points` as keys and the corresponding number of points in order to reject the Null, `epsilon_points`, 
        constitute the values.

    Notes
    -----
    One parameter of the binomial distribution is `p`, the other one would be the number of trials, i.e. the considered number of points 
    of the :math:`\\delta`-neighborhood. `trial_range` determines the number of considered :math:`\\delta`-neighborhood-points, always starting from 8. For 
    instance, if `trial_range = 8`, then :math:`\\delta`-neighborhood sizes from 8 up to 15 are considered.
    '''
    assert trial_range >= 1
    delta_to_epsilon_amount = dict()
    for key in range(8,8+trial_range):
        delta_to_epsilon_amount[key] = int(binom.ppf(1-alpha, key, p))
    return delta_to_epsilon_amount
Beispiel #25
0
 def get_covarying_errors(self):
     nucleotide_counts = self.get_nucleotide_counts()
     summary = nucleotide_counts.loc[
         ~nucleotide_counts.covarying,
         ['nucleotide_max', 'coverage']
     ].sum()
     total_coverage = summary['coverage']
     total_consensus = summary['nucleotide_max']
     error_rate = np.abs(total_coverage - total_consensus) / total_consensus
     nucleotide_counts.loc[:, 'n_error'] = \
         nucleotide_counts.loc[:, 'coverage'].apply(
             lambda count: binom.ppf(
                 1-self.error_threshold, count, error_rate
                 )
         )
     nucleotide_counts.loc[:, 'site'] = nucleotide_counts.index
     nucleotide_counts.loc[:, 'covarying'] = False
     nucleotide_counts.loc[self.covarying_sites, 'covarying'] = True
     site_counts = nucleotide_counts.loc[
         nucleotide_counts['covarying'], :
     ].melt(
         id_vars=['n_error', 'site'], value_vars=['A', 'C', 'G', 'T']
     )
     covarying_values = site_counts['value']
     covarying_counts = site_counts['n_error']
     significant = (covarying_values <= covarying_counts) & \
         (covarying_values > 0)
     covarying_errors = site_counts.loc[significant, :] \
         .sort_values(by='site') \
         .reset_index(drop=True)
     self.covarying_errors = covarying_errors
     return covarying_errors
def qbinom(p, size, prob=0.5):
    """
    Calculates the quantile function from the binomial distribution
    """
    from scipy.stats import binom
    result=binom.ppf(q=p,n=size,p=prob,loc=0)
    return result
def find_sample_size_for_stopping_prob_r2bravo(stopping_probability, N_w, N_l, alpha, underlying=None, right=None):
    """
    Finds the first round size that achieves the passed stopping_probability
    for an R2 Bravo audit (with no stratification). 
    """

    N = N_w + N_l 

    left = 1
    right = N
     
    while(1):
        n = math.ceil((left + right) / 2)

        # compute the 1 - stopping_probability quantile of the alt dist
        # kmax where pr[k >= kmax | alt] = stopping_probability
        # floor because we need to ensure at least a stopping_probability prob of stopping
        kmax = math.floor(binom.ppf(1 - stopping_probability, n, N_w / N))

        # compute pvalue for this kmax
        pvalue = r2bravo_pvalue_direct_count(winner_votes=kmax, n=n, popsize=N, alpha=alpha, Vw=N_w, Vl=N_l, null_margin=0)

        # update binary search bounds
        if (pvalue > alpha):
            left = n
        elif (pvalue <= alpha):
            right = n
 
        # when and right converge, right is the minimum round size that achieves stopping_probability
        if (left == right - 1):
            if (right == N):
                print("required round size is greater than stratum size")
            return right
Beispiel #28
0
 def monitor(self, data, model_id=0):
     gamma = 0.4  # Filter rate.
     n = data.shape[0]
     # Get Operation Mode
     op_mode = self.models[model_id]
     # Compute the limit of out-of-bounds sample to be detected as out of the model.
     limit = np.round(
         binom.ppf(op_mode.confidence, n, 1 - op_mode.confidence))
     # Compute the log likelihood.
     logprob, responsability = op_mode.model.score_samples(data)
     # Filter statistics.
     filtered_stats = exponential_filter(logprob, gamma)
     # Other info.
     idx_out = -filtered_stats > op_mode.threshold
     num_out = np.sum(idx_out)
     out = num_out > limit
     data_out = data[idx_out, ]
     # Return:
     # Monitored Statistics (filtered negative log-likelihood)
     # Threshold of the selected operation model.
     # Bit indicating whether the behaviour is out of the OP.
     # Number of samples beyond the threshold.
     # Data points that were out of the model.
     # Idx of the operation mode.
     return -filtered_stats, op_mode.threshold, out, num_out, data_out, model_id
Beispiel #29
0
def compute_unconditional_power(margin, N_wl, pi, alpha):
    '''
    Compute unconditional power of the test.

    margin = vote margin (votes for w / votes for w or l) in the population
    N_wl = the total number of ballots for either the winner or loser in the population,
    pop = total population size,
    pi = the sampling probability,
    alpha = the type I error rate
    '''
    unlikely_draw_lower = binom.ppf(0.005, N_wl, pi)
    unlikely_draw_upper = binom.ppf(0.995, N_wl, pi)
    power_sum = 0

    powers = Parallel(n_jobs=num_cores)(delayed(compute)(margin, N_wl, pi, alpha, n) \
            for n in range(int(unlikely_draw_lower), int(unlikely_draw_upper)))
    return sum(powers)
def get_probable_maximum_selected(
        n_total_trials, n_trials, selection_prob, chance=(1.0 / 100.0)):
    """ Get the likely maximum number of items that will be selected from a\
        set of n_trials from a total set of n_total_trials\
        with a probability of selection of selection_prob
    """
    prob = 1.0 - (chance / float(n_total_trials))
    return binom.ppf(prob, n_trials, selection_prob)
Beispiel #31
0
def get_probable_maximum_selected(
        n_total_trials, n_trials, selection_prob, chance=(1.0 / 100.0)):
    """ Get the likely maximum number of items that will be selected from a\
        set of n_trials from a total set of n_total_trials\
        with a probability of selection of selection_prob
    """
    prob = 1.0 - (chance / float(n_total_trials))
    return binom.ppf(prob, n_trials, selection_prob)
Beispiel #32
0
    def parallel_forward_binom_step(self, dB: int = 0, num_sims=10000):
        # get previous state
        S, I, R, D, N = (vector[-1].copy()
                         for vector in (self.S, self.I, self.R, self.D,
                                        self.N))

        # update state
        Rt = self.Rt0 * S / N
        p = self.gamma * Rt * I / N

        num_cases = binom.rvs(n=S.astype(int), p=p, size=num_sims)
        self.upper_CI.append(binom.ppf(self.CI, n=S.astype(int), p=p))
        self.lower_CI.append(binom.ppf(1 - self.CI, n=S.astype(int), p=p))

        I += num_cases
        S -= num_cases

        rate_D = self.m * self.gamma * I
        num_dead = poisson.rvs(rate_D, size=num_sims)
        D += num_dead

        rate_R = (1 - self.m) * self.gamma * I
        num_recov = poisson.rvs(rate_R, size=num_sims)
        R += num_recov

        I -= (num_dead + num_recov)

        S = S.clip(0)
        I = I.clip(0)
        D = D.clip(0)

        N = S + I + R
        # beta = (num_cases * N)/(b * S * I)

        # update state vectors
        self.Rt.append(Rt)
        # self.b.append(b)
        self.S.append(S)
        self.I.append(I)
        self.R.append(R)
        self.D.append(D)
        self.N.append(N)
        # self.beta.append(beta)
        self.dT.append(num_cases)
        self.total_cases.append(I + R + D)
Beispiel #33
0
def testBinom():  # {{{
    """
    Binomial Distribution (二项分布 discrete)

    二项分布的例子:抛掷10次硬币,恰好两次正面朝上的概率是多少?

    事件要么发生, 要么不发生

    """

    # 准备数据: 已知 n(伯努利实验次数), p(某件事件发生的概率)
    # X轴: n次实验中事件出现k次
    # Y轴: 概率
    n = 100  # 当n很大(np > 5 && nq > 5) 近似 X ~ N(np, npq)
    p = 0.5
    xs = np.arange(binom.ppf(0.01, n, p), binom.ppf(0.99, n, p))

    # E(X) = np, D(X) = np(1-p)
    mean, var, skew, kurt = binom.stats(n, p, loc=0, moments='mvsk')
    print("mean: %.2f, var: %.2f, skew: %.2f, kurt: %.2f" %
          (mean, var, skew, kurt))

    fig, axs = plt.subplots(1, 3)

    # 显示pmf
    ys = binom.pmf(xs, n, p)
    axs[0].plot(xs, ys, 'bo', markersize=5, label='binom pmf')
    axs[0].legend()

    # 显示cdf
    ys = binom.cdf(xs, n, p)
    axs[1].plot(xs, ys, 'bo', markersize=5, label='binom cdf')
    axs[1].legend()

    # 随机变量RVS
    data = binom.rvs(n, p, size=1000)
    import sys
    sys.path.append("../../thinkstats")
    import Pmf
    pmf = Pmf.MakePmfFromList(data)
    xs, ys = pmf.Render()
    axs[2].plot(xs, ys, 'bo', markersize=5, label='rvs pmf')
    axs[2].legend()

    plt.show()
Beispiel #34
0
def plot_with_uniform_band(values,
                           ci_level,
                           x_label,
                           n_bins=30,
                           figsize=(10, 4),
                           ylim=[0, 50]):
    '''
    Plots the PIT/HPD histogram and calculates the confidence interval for the bin values, were the PIT/HPD values follow an uniform distribution

    @param values: a numpy array with PIT/HPD values
    @param ci_level: a float between 0 and 1 indicating the size of the confidence level
    @param x_label: a string, populates the x_label of the plot
    @param n_bins: an integer, the number of bins in the histogram
    @param figsize: a tuple, the plot size (width, height)
    @param ylim: a list of two elements, including the lower and upper limit for the y axis

    @returns The matplotlib figure object with the histogram of the PIT/HPD values and the CI for the uniform distribution
    '''

    # Extract the number of CDEs
    n = values.shape[0]

    # Creating upper and lower limit for selected uniform band
    ci_quantity = (1 - ci_level) / 2
    low_lim = binom.ppf(q=ci_quantity, n=n, p=1 / n_bins)
    upp_lim = binom.ppf(q=ci_level + ci_quantity, n=n, p=1 / n_bins)

    # Creating figure
    fig = plt.figure(figsize=figsize)
    plt.hist(values, bins=n_bins)
    plt.axhline(y=low_lim, color='grey')
    plt.axhline(y=upp_lim, color='grey')
    plt.axhline(y=n / n_bins, label='Uniform Average', color='red')
    plt.fill_between(x=np.linspace(0, 1, 100),
                     y1=np.repeat(low_lim, 100),
                     y2=np.repeat(upp_lim, 100),
                     color='grey',
                     alpha=0.2)
    plt.legend(loc='best', prop={'size': 18})
    plt.xlabel(x_label, size=20)
    plt.ylim(ylim)
    plt.xticks(size=16)
    plt.yticks(size=16)
    plt.close()
    return fig
Beispiel #35
0
def npfs(X, y, n_select, base="mim", alpha=.01, n_bootstraps=100):
  """
  Parameters
  ----------
  X : array-like, shape = (n_samples, n_features_in)
      Sample vectors.
  
  y : array-like, shape = (n_samples,)
      Target vector (class labels).
  
  base : string
      PyFeast feature selection method. ['mim', 'mrmr', 'jmi']
  
  alpha : double
      Size of the hypothesis test for NPFS 
  
  n_bootstraps : double 
      Number of boostraps 

  Returns
  -------
  selections : array 
      Vector of selected features. Length is variable.  
  """
 
  try: 
    fs_method = getattr(feast, base)
  except ImportError: 
    raise("Method does not exist in FEAST")
  
  n_samp, n_feat = X.shape
  
  X = bin_data(X, n_bins=np.sqrt(n_samp))

  if n_samp != len(y):
    ValueError('len(y) and X.shape[0] must be the equal.')

  bern_matrix = np.zeros((n_feat,n_bootstraps))

  for n in range(n_bootstraps):
    # generate a random sample
    idx = np.random.randint(0, n_samp, n_samp)
    sels = fs_method(1.0*X[idx], y[idx], n_select)
    b_sels = np.zeros((n_feat,))
    b_sels[sels] = 1.
    bern_matrix[:, n] = b_sels

  delta = binom.ppf(1-alpha, n_bootstraps, 1.*n_select/n_feat)
  z = np.sum(bern_matrix, axis=1)

  selections = []
  for k in range(n_feat):
    if z[k] > delta:
      selections.append(k)
  
  return selections, bern_matrix, delta
def generate_multivariate_binomial(cpu,mem,num_tasks):
    mean = [0, 0, 0]
    cov = [[1, -0.5, -0.5], [-0.5, 1, -0.5], [-0.5, -0.5, 1]]
    x, y, z = np.random.multivariate_normal(mean, cov, num_tasks).T

    cpus = []
    mems = []
    values = []
    for ix in x:
        cpus.append(binom.ppf(norm.cdf(ix),cpu,8/cpu))

    for iy in y:
        mems.append(binom.ppf(norm.cdf(iy),mem,8/mem))

    for iz in z:
        values.append(norm.cdf(iz)*(100-1)+1)
#    print("cpu mem corr: ", np.corrcoef(cpus,mems)[0, 1])
#    print("cpus: ",cpus)
    return cpus,mems,values
def decision_threshold(overall=0.99, accuracy=0.95, samples=1000):
    """
    Calculate the decision threshold. This is based on Binomial Distribution.
    :param overall: Certainty we can sure the image is LevelA
    :param accuracy: Accuracy of predicting levelB is LevelB
    :param samples: How many sub images in total
    :return: integer value; if more than this value of sub images are LevelA, this image is LevelA
    """
    k = binom.ppf(overall, samples, 1 - accuracy)
    return int(k)
Beispiel #38
0
  def fit(self, data, labels):
    """
    @self - self explanitory
    @data - data in a numpy array. here are some suggestions for formatting 
      the data. 
      len(data) = n_observations
      len(data.transpose()) = n_features
    @labels - numerical class labels in a numpy array. 
      len(labels) = n_observations
    """
    data, labels = self.__check_data(data, labels)
    try: 
      fs_method = getattr(feast, self.fs_method)
    except ImportError: 
      raise("Method does not exist in FEAST")

    self.n_observations = len(data)
    self.n_features = len(data.transpose())
    self.method = fs_method

    # @Z - contains the observations of the Bernoulli random variables
    #      that are whether the feature were or were not selected 
    Z = np.zeros( (self.n_features, self.n_bootstraps) )
    self.data = data
    self.labels = labels

    if self.parallel == None: 
      for b in range(self.n_bootstraps):
        sf = self.boot_iteration()
        Z[sf, b] = 1  # mark the features selected with a '1'.
    else:
      pool = Pool(processes = self.parallel)
      sfs = pool.map(__call__, (self for x in range(self.n_bootstraps)))
      for x in range(len(sfs)):
        Z[sfs[x], x] = 1

    z = np.sum(Z, axis=1)  # z is a binomial random variable
    # compute the neyman-pearson threshold (include the bias term)
    p = (1.0*self.n_select)/self.n_features + self.beta
    if p > 1.0: # user chose \beta poorly -- null it out
      raise ValueError("p+beta > 1 -> Invalid probability")

    delta = binom.ppf(1 - self.alpha, self.n_bootstraps, p)
    # based on the threshold, determine which features are relevant and return
    # them in a numpy array 
    selected_features = []
    for k in range(self.n_features):
      if z[k] > delta:
        selected_features.append(k)

    self.Bernoulli_matrix = Z
    self.selected_features = np.array(selected_features)
    return self.selected_features
Beispiel #39
0
def qbinom(p, n):
  """
  quantile function for binomial with
  probability of success 0.5
  returns smallest k such that Prob(X <= k) >= p
  compare to R qbinom
  :param n: number
  :param p: quantile level
  :return: k
  """

  return binom.ppf(p, n, 0.5)
Beispiel #40
0
 def test_merge_by_weight(self):
     selected_counts = {0: 0, 1: 0}
     alpha = 0.01
     nrounds = 1000
     from scipy.stats import binom
     # lower and upper bounds of 95% CI for selecting the segment with weight 1/3
     lb = binom.ppf(alpha/2.0, nrounds, 1.0/3.0)
     ub = binom.ppf(1.0-alpha/2.0, nrounds, 1.0/3.0)
     
     system = WESTSystem()
     system.bin_mapper = RectilinearBinMapper([[0.0, 1.0]])
     system.bin_target_counts = numpy.array([1])
     system.pcoord_len = 2
     self.we_driver = WEDriver(system=system)
     self.system = system
     self._seg_id = 0
     
     segments = [Segment(n_iter=1, seg_id=0, pcoord=numpy.array([[0],[0.25]], dtype=numpy.float32),weight=1.0/3.0),
                 Segment(n_iter=1, seg_id=1, pcoord=numpy.array([[0],[0.75]], dtype=numpy.float32),weight=2.0/3.0)]
     
     for _iround in xrange(nrounds):
         for segment in segments:
             segment.endpoint_type = Segment.SEG_ENDPOINT_UNSET
             
         self.we_driver.new_iteration()
         self.we_driver.assign(segments)
         self.we_driver.construct_next()
         
         assert len(self.we_driver.next_iter_binning[0]) == 1
         newseg = self.we_driver.next_iter_binning[0].pop()
         
         assert segments[newseg.parent_id].endpoint_type == Segment.SEG_ENDPOINT_CONTINUES
         assert segments[~newseg.parent_id].endpoint_type == Segment.SEG_ENDPOINT_MERGED
         
         selected_counts[newseg.parent_id] += 1
         
     print(selected_counts)
     assert lb <= selected_counts[0] <= ub, ('Incorrect proportion of histories selected.'
                                             'this is expected about {:%} of the time; retry test.'.format(alpha))
Beispiel #41
0
def npfs_chi2(X, y, fpr=0.05, alpha=.01, n_bootstraps=100):
  """
  Parameters
  ----------
  X : array-like, shape = (n_samples, n_features_in)
      Sample vectors.
  
  y : array-like, shape = (n_samples,)
      Target vector (class labels).
  
  fpr : double
      False positive rate for the Chi2-test feature selection approach
  
  alpha : double
      Size of the hypothesis test for NPFS 
  
  n_bootstraps : double 
      Number of boostraps 

  Returns
  -------
  selections : array 
      Vector of selected features. Length is variable.  
  """
  n_samp, n_feat = X.shape
  
  X = bin_data(X, n_bins=np.sqrt(n_samp))

  if n_samp != len(y):
    ValueError('len(y) and X.shape[0] must be the equal.')

  bern_matrix = np.zeros((n_feat,n_bootstraps))

  for n in range(n_bootstraps):
    # generate a random sample
    idx = np.random.randint(0, n_samp, n_samp)
    chi, pval = chi2(1.0*X[idx], y[idx])
    sels = np.where(pval <= fpr)
    b_sels = np.zeros((n_feat,))
    b_sels[sels] = 1.
    bern_matrix[:, n] = b_sels

  delta = binom.ppf(1-alpha, n_bootstraps, fpr)
  z = np.sum(bern_matrix, axis=1)

  selections = []
  for k in range(n_feat):
    if z[k] > delta:
      selections.append(k)

  return selections, bern_matrix, delta  
Beispiel #42
0
def get_Binom_cutoff(readlengths,genelength,alpha, mincut=2):
    NR=len(readlengths)
    if NR==0:
        return mincut
    else:
        RL=numpy.array(readlengths)
        Mean_RL=numpy.mean(RL)
        Prob=float(Mean_RL)/float(genelength)
        
        k=int(binom.ppf(1-(alpha),NR, Prob))
        if k < mincut:
            return mincut
        else:
            return k
Beispiel #43
0
def get_FDR_cutoff_binom(readlengths, genelength, alpha, mincut = 2):
    number_reads = len(readlengths)
    
    if number_reads == 0:
        return mincut
    else:
        read_length = numpy.array(readlengths)
        mean_read_length = numpy.mean(read_length)
        prob = float(mean_read_length) / float(genelength)
        try:
            k = int(binom.ppf(1 - (alpha), number_reads, prob))
            if k < mincut:
                return mincut
            else:
                return k
        except:
            print read_length, mean_read_length, prob, alpha, number_reads
            raise
Beispiel #44
0
def get_FDR_cutoff_binom(readlengths, genelength, alpha, mincut = 2):
    number_reads = len(readlengths)
    
    if number_reads == 0:
        return mincut
    else:
        read_length = numpy.array(readlengths)
        mean_read_length = numpy.mean(read_length)
        prob = float(mean_read_length) / float(genelength)
        if prob > 1:
            raise ValueError("probability of >= 1 read per-base > 1")
        try:
            k = int(binom.ppf(1 - (alpha), number_reads, prob))
            if k < mincut:
                return mincut
            else:
                return k
        except:
            print read_length, mean_read_length, genelength, prob, alpha, number_reads
            raise
Beispiel #45
0
def bino_p2da(y, p):
    """For a given vector label, get the decoding accuracy of p-values
    using the binomial law.

    Args:
        y: array
            The vector label

        p: int / float / list / array [0 <= p < 1]
            p-value. Ex : p = [0.05, 0.01, 0.001, 0.00001]

    Return:
        da: ndarray
            The decoding accuracy associate to each p-value
    """
    y = np.ravel(y)
    nbepoch = len(y)
    nbclass = len(np.unique(y))
    if not isinstance(p, np.ndarray):
        p = np.array(p)
    if (p.max() >= 1):
        raise ValueError('Consider 0<=p<1')
    return binom.ppf(1 - p, nbepoch, 1 / nbclass) * 100 / nbepoch
Beispiel #46
0
 def monitor(self, data, model_id=0):
     gamma = 0.4   # Filter rate.
     n = data.shape[0]
     # Get Operation Mode
     op_mode = self.models[model_id]
     # Compute the limit of out-of-bounds sample to be detected as out of the model.
     limit = np.round(binom.ppf(op_mode.confidence, n, 1-op_mode.confidence))
     # Compute the log likelihood.
     logprob, responsability = op_mode.model.score_samples(data)
     # Filter statistics.
     filtered_stats = exponential_filter(logprob, gamma)
     # Other info.
     idx_out = -filtered_stats > op_mode.threshold
     num_out = np.sum(idx_out)
     out = num_out > limit
     data_out = data[idx_out,]
     # Return:
     # Monitored Statistics (filtered negative log-likelihood)
     # Threshold of the selected operation model.
     # Bit indicating whether the behaviour is out of the OP.
     # Number of samples beyond the threshold.
     # Data points that were out of the model.
     # Idx of the operation mode.
     return -filtered_stats, op_mode.threshold, out, num_out, data_out, model_id
import numpy as np
from scipy.stats import binom
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
n, p = 5, 0.4
mean, var, skew, kurt = binom.stats(n, p, moments='mvsk')
x = np.arange(binom.ppf(0.01, n, p),binom.ppf(0.99, n, p))

ax.plot(x, binom.pmf(x, n, p), 'bo', ms=8, label='binom pmf')
ax.vlines(x, 0, binom.pmf(x, n, p), colors='b', lw=5, alpha=0.5)
plt.show()
Beispiel #48
0
# Distribucion Binomial usando scipy.stats

from scipy.stats import binom
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculamos los primeros momentos:

n, p = 5, 0.4
mean, var, skew, kurt = binom.stats(n, p, moments='mvsk')

# Mostramos el pmf de la variable aleatoria  (``pmf``):

x = np.arange(binom.ppf(0.01, n, p),
              binom.ppf(0.99, n, p))
ax.plot(x, binom.pmf(x, n, p), 'bo', ms=8, label='pmf binomial')
ax.vlines(x, 0, binom.pmf(x, n, p), colors='b', lw=5, alpha=0.5)
ax.legend(loc='best', frameon=False)


# Comprobar la exactitud del  ``cdf`` y  ``ppf``:

prob = binom.cdf(x, n, p)
np.allclose(x, binom.ppf(prob, n, p))


# Generamos numeros aleatorios

r = binom.rvs(n, p, size=1000)
plt.show()
def sgn_test_threshold( count, p_value=0.05  ):
    return (count - binom.ppf(p_value, count,0.5) + 1)/count
Beispiel #50
0
def binostat(y, p):
    y = n.ravel(y)
    nbepoch = len(y)
    nbclass = len(n.unique(y))
    return binom.ppf(1 - p, nbepoch, 1 / nbclass) * 100 / nbepoch
Beispiel #51
0
b = 33

fig, ax = plt.subplots(1, 1)
n = 400
step = 1


p = float(1) / float(1 + b)
mean, var, skew, kurt = binom.stats(n, p, moments='mvsk')
print binom.var(n, p)
print binom.expect(lambda x: x, args=(n, p))
print binom.expect(lambda x: x ** 2, args=(n, p))

# x = np.arange(binom.ppf(0.00001, n, p), binom.ppf(0.99999, n, p))
# x = np.arange(binom.ppf(0.01, n, p), binom.ppf(0.99, n, p))
x = np.arange(binom.ppf(0.001, n, p), binom.ppf(0.999, n, p), step)
y = np.array(binom.pmf(x, n, p), dtype=float)


def squarer(pos1=1, pos2=len(x)):
    square = 0
    if pos2 > len(x): pos2 -= len(x)
    for i in range(pos1, pos2):
        square += (float(y[i - 1] + y[i]) / float(2)) * (x[i] - x[i - 1])
    return square


print("Square: ", squarer(2, 3))
print("Full square: ", squarer())

ax.plot(x, binom.pmf(x, n, p), 'bo', ms=7, label='binom pmf')