Example #1
0
def test_bootstrap_against_itself_1samp(method, expected):
    # The expected values in this test were generated using bootstrap
    # to check for unintended changes in behavior. The test also makes sure
    # that bootstrap works with multi-sample statistics and that the
    # `axis` argument works as expected / function is vectorized.
    np.random.seed(0)

    n = 100  # size of sample
    n_resamples = 999  # number of bootstrap resamples used to form each CI
    confidence_level = 0.9

    # The true mean is 5
    dist = stats.norm(loc=5, scale=1)
    stat_true = dist.mean()

    # Do the same thing 2000 times. (The code is fully vectorized.)
    n_replications = 2000
    data = dist.rvs(size=(n_replications, n))
    res = bootstrap((data,),
                    statistic=np.mean,
                    confidence_level=confidence_level,
                    n_resamples=n_resamples,
                    batch=50,
                    method=method,
                    axis=-1)
    ci = res.confidence_interval

    # ci contains vectors of lower and upper confidence interval bounds
    ci_contains_true = np.sum((ci[0] < stat_true) & (stat_true < ci[1]))
    assert ci_contains_true == expected

    # ci_contains_true is not inconsistent with confidence_level
    pvalue = stats.binomtest(ci_contains_true, n_replications,
                             confidence_level).pvalue
    assert pvalue > 0.1
def sign_test(samp, mu0=0):
    """
    Signs test

    Parameters
    ----------
    samp : array_like
        1d array. The sample for which you want to perform the sign test.
    mu0 : float
        See Notes for the definition of the sign test. mu0 is 0 by
        default, but it is common to set it to the median.

    Returns
    -------
    M
    p-value

    Notes
    -----
    The signs test returns

    M = (N(+) - N(-))/2

    where N(+) is the number of values above `mu0`, N(-) is the number of
    values below.  Values equal to `mu0` are discarded.

    The p-value for M is calculated using the binomial distribution
    and can be interpreted the same as for a t-test. The test-statistic
    is distributed Binom(min(N(+), N(-)), n_trials, .5) where n_trials
    equals N(+) + N(-).

    See Also
    --------
    scipy.stats.wilcoxon
    """
    samp = np.asarray(samp)
    pos = np.sum(samp > mu0)
    neg = np.sum(samp < mu0)
    M = (pos - neg) / 2.0
    try:
        p = stats.binomtest(min(pos, neg), pos + neg, 0.5).pvalue
    except AttributeError:
        # Remove after min SciPy >= 1.7
        p = stats.binom_test(min(pos, neg), pos + neg, 0.5)
    return M, p
Example #3
0
def test_bootstrap_against_itself_2samp(method, expected):
    # The expected values in this test were generated using bootstrap
    # to check for unintended changes in behavior. The test also makes sure
    # that bootstrap works with multi-sample statistics and that the
    # `axis` argument works as expected / function is vectorized.
    np.random.seed(0)

    n1 = 100  # size of sample 1
    n2 = 120  # size of sample 2
    n_resamples = 999  # number of bootstrap resamples used to form each CI
    confidence_level = 0.9

    # The statistic we're interested in is the difference in means
    def my_stat(data1, data2, axis=-1):
        mean1 = np.mean(data1, axis=axis)
        mean2 = np.mean(data2, axis=axis)
        return mean1 - mean2

    # The true difference in the means is -0.1
    dist1 = stats.norm(loc=0, scale=1)
    dist2 = stats.norm(loc=0.1, scale=1)
    stat_true = dist1.mean() - dist2.mean()

    # Do the same thing 1000 times. (The code is fully vectorized.)
    n_replications = 1000
    data1 = dist1.rvs(size=(n_replications, n1))
    data2 = dist2.rvs(size=(n_replications, n2))
    res = bootstrap((data1, data2),
                    statistic=my_stat,
                    confidence_level=confidence_level,
                    n_resamples=n_resamples,
                    batch=50,
                    method=method,
                    axis=-1)
    ci = res.confidence_interval

    # ci contains vectors of lower and upper confidence interval bounds
    ci_contains_true = np.sum((ci[0] < stat_true) & (stat_true < ci[1]))
    assert ci_contains_true == expected

    # ci_contains_true is not inconsistent with confidence_level
    pvalue = stats.binomtest(ci_contains_true, n_replications,
                             confidence_level).pvalue
    assert pvalue > 0.1
Example #4
0
File: H0.py Project: CNERG/RadClass
    def binom(self, x1, n, p):
        '''
        Private method for running binomial test.

        Return:
        lpval: log base 10 p-value result of scipy.stats.binomtest.
        '''
        # np.log10(1E-350), chosen to be smaller than any possible result
        min_lpval = -350.0
        # scipy.stats.binomtest will fail if n (# of trials)
        # is less than 1 (possible for high-energy bins)
        if int(n) < 1:
            lpval = 0.0
        else:
            pval = stats.binomtest(int(x1), int(n), p,
                                   alternative='two-sided').pvalue
            if pval == 0.0:
                lpval = min_lpval
            else:
                lpval = np.log10(pval)
        return lpval
Example #5
0
def check_binomial_dist(dataframe, transform=None):

    print("Transformation = {}".format(transform))

    df = pd.DataFrame()
    df["Participant"] = dataframe["Participant"]

    for col in dataframe.columns[1:]:

        min_val = min([i for i in dataframe[dataframe.columns[1:]].min()])

        if transform is None:
            d = [i for i in dataframe[col]]

        if transform == "log":
            # if value <= 0 in dataframe, adds (|min_val| + 1) to all data so smallest value is 1
            if min_val <= 0:
                d = [math.log10(i-min_val + 1) for i in dataframe[col]]

            # Does not add constant if no <= 0 values
            if min_val > 0:
                d = [math.log10(i) for i in dataframe[col]]

        if transform == "root":
            # if value < 0 in dataframe, adds (|min_val|) to all data so smallest value is 0
            if min_val < 0:
                d = [np.sqrt(i - min_val) for i in dataframe[col]]

            # Does not add constant if no < 0 values
            if dataframe[col].min() >= 0:
                d = [np.sqrt(i) for i in dataframe[col]]

        data = stats.binomtest(d)

        df[col] = d

        sig = "***" if data[0] < .05 else ""
        print("-{}: p = {} {}".format(col, round(data[0], 3), sig))

    return df
Example #6
0
def test_betting_mart_crossing_probabilities(theta):
    # Note that these tests are random and will each individually
    # fail at most 5% of the time.

    for m in [0.2, 0.5, 0.8]:
        repeats = 500
        alpha = 0.1
        dist_fn = lambda: np.random.binomial(1, m, 10000)
        mart_fn = lambda x: betting_mart(x, m, alpha=alpha, theta=theta)

        crossing_frac = superMG_crossing_fraction(mart_fn,
                                                  dist_fn,
                                                  alpha=alpha,
                                                  repeats=repeats)

        crossing_test = binomtest(int(crossing_frac * repeats),
                                  n=repeats,
                                  p=alpha,
                                  alternative="greater")

        lower_ci = crossing_test.proportion_ci(confidence_level=0.95)[0]

        assert lower_ci > 0
        assert lower_ci <= alpha
ci_r = res_r.confidence_interval        
print(f"""\n\n
            R {CI*100}%  Confidence Interval:
        ---------------------------------
             Lower     |     Upper
          -----------------------------
           {ci_r.low:.3e}  |    {ci_r.high:.3e}
      """)      
      
    #%%  
import scipy.stats as stats

pct = .99
threshold = 1e-12
sig_level = .01
res = stats.binomtest(cython_errors[cython_errors < threshold].shape[0], n=1000, p=pct,  alternative='greater')

print(f"""
Null hypothesis: We claim that at least {pct*100}% of results have are accurate, 
where accurate is defined by error less than {threshold}.
Out of 1000 results, {cython_errors[cython_errors < threshold].shape[0]} had errors less than than {threshold}.

Test this claim: 
>>> stats.binomtest({cython_errors[cython_errors < threshold].shape[0]}, n=1000, p={pct}, alternative='greater')
{res}

pvalue = {res.pvalue:.6f} {"<" if res.pvalue < sig_level else ">"} {sig_level}

Therefore, we do {"not " if res.pvalue < sig_level else ""}reject the null hypothesis. 
The observed values are {"" if res.pvalue < sig_level else "not "}within the range of the claim.