예제 #1
0
    def __init__(self,
                 control,
                 test,
                 effect_size,
                 is_paired=False,
                 ci=95,
                 resamples=5000,
                 random_seed=12345):
        """
        Compute the effect size between two groups.

        Parameters
        ----------
        control : array-like
        test : array-like
            These should be numerical iterables.
        effect_size : string.
            Any one of the following are accepted inputs:
            'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'
        is_paired : boolean, default False
        resamples : int, default 5000
            The number of bootstrap resamples to be taken.
        ci : float, default 95
            The confidence interval width. The default of 95 produces 95%
            confidence intervals.
        random_seed : int, default 12345
            `random_seed` is used to seed the random number generator during
            bootstrap resampling. This ensures that the confidence intervals
            reported are replicable.


        Returns
        -------
        A :py:class:`TwoGroupEffectSize` object.
        
        difference : float
            The effect size of the difference between the control and the test.
        
        effect_size : string
            The type of effect size reported.
        
        is_paired : boolean
            Whether or not the difference is paired (ie. repeated measures).
            
        ci : float
            Returns the width of the confidence interval, in percent.
            
        alpha : float
            Returns the significance level of the statistical test as a float
            between 0 and 1.
            
        resamples : int
            The number of resamples performed during the bootstrap procedure.

        bootstraps : nmupy ndarray
            The generated bootstraps of the effect size.
            
        random_seed : int
            The number used to initialise the numpy random seed generator, ie.
            `seed_value` from `numpy.random.seed(seed_value)` is returned.
            
        bca_low, bca_high : float
            The bias-corrected and accelerated confidence interval lower limit
            and upper limits, respectively.
            
        pct_low, pct_high : float
            The percentile confidence interval lower limit and upper limits, 
            respectively.
            
            
        Examples
        --------
        >>> import numpy as np
        >>> import scipy as sp
        >>> import dabest
        >>> np.random.seed(12345)
        >>> control = sp.stats.norm.rvs(loc=0, size=30)
        >>> test = sp.stats.norm.rvs(loc=0.5, size=30)
        >>> effsize = dabest.TwoGroupsEffectSize(control, test, "mean_diff")
        >>> effsize
        The unpaired mean difference is -0.253 [95%CI -0.782, 0.241]
        5000 bootstrap samples. The confidence interval is bias-corrected
        and accelerated.
        >>> effsize.to_dict() 
        {'alpha': 0.05,
         'bca_high': 0.2413346581369784,
         'bca_interval_idx': (109, 4858),
         'bca_low': -0.7818088458343655,
         'bootstraps': array([-1.09875628, -1.08840014, -1.08258695, ...,  0.66675324,
                 0.75814087,  0.80848265]),
         'ci': 95,
         'difference': -0.25315417702752846,
         'effect_size': 'mean difference',
         'is_paired': False,
         'pct_high': 0.25135646125431527,
         'pct_interval_idx': (125, 4875),
         'pct_low': -0.763588353717278,
         'pvalue_brunner_munzel': nan,
         'pvalue_kruskal': nan,
         'pvalue_mann_whitney': 0.2600723060808019,
         'pvalue_paired_students_t': nan,
         'pvalue_students_t': 0.34743913903372836,
         'pvalue_welch': 0.3474493875548965,
         'pvalue_wilcoxon': nan,
         'random_seed': 12345,
         'resamples': 5000,
         'statistic_brunner_munzel': nan,
         'statistic_kruskal': nan,
         'statistic_mann_whitney': 406.0,
         'statistic_paired_students_t': nan,
         'statistic_students_t': 0.9472545159069105,
         'statistic_welch': 0.9472545159069105,
         'statistic_wilcoxon': nan}
        """

        from numpy import array, isnan
        from numpy import sort as npsort
        from numpy.random import choice, seed

        import scipy.stats as spstats

        # import statsmodels.stats.power as power

        from string import Template
        import warnings

        from ._stats_tools import confint_2group_diff as ci2g
        from ._stats_tools import effsize as es

        self.__EFFECT_SIZE_DICT = {
            "mean_diff": "mean difference",
            "median_diff": "median difference",
            "cohens_d": "Cohen's d",
            "hedges_g": "Hedges' g",
            "cliffs_delta": "Cliff's delta"
        }

        kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]
        if effect_size not in kosher_es:
            err1 = "The effect size '{}'".format(effect_size)
            err2 = "is not one of {}".format(kosher_es)
            raise ValueError(" ".join([err1, err2]))

        if effect_size == "cliffs_delta" and is_paired is True:
            err1 = "`paired` is True; therefore Cliff's delta is not defined."
            raise ValueError(err1)

        # Convert to numpy arrays for speed.
        # NaNs are automatically dropped.
        control = array(control)
        test = array(test)
        control = control[~isnan(control)]
        test = test[~isnan(test)]

        self.__effect_size = effect_size
        self.__control = control
        self.__test = test
        self.__is_paired = is_paired
        self.__resamples = resamples
        self.__random_seed = random_seed
        self.__ci = ci
        self.__alpha = ci2g._compute_alpha_from_ci(ci)

        self.__difference = es.two_group_difference(control, test, is_paired,
                                                    effect_size)

        self.__jackknives = ci2g.compute_meandiff_jackknife(
            control, test, is_paired, effect_size)

        self.__acceleration_value = ci2g._calc_accel(self.__jackknives)

        bootstraps = ci2g.compute_bootstrapped_diff(control, test, is_paired,
                                                    effect_size, resamples,
                                                    random_seed)
        self.__bootstraps = npsort(bootstraps)

        self.__bias_correction = ci2g.compute_meandiff_bias_correction(
            self.__bootstraps, self.__difference)

        # Compute BCa intervals.
        bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(
            self.__bias_correction, self.__acceleration_value,
            self.__resamples, ci)

        self.__bca_interval_idx = (bca_idx_low, bca_idx_high)

        if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):
            self.__bca_low = self.__bootstraps[bca_idx_low]
            self.__bca_high = self.__bootstraps[bca_idx_high]

            err1 = "The $lim_type limit of the interval"
            err2 = "was in the $loc 10 values."
            err3 = "The result should be considered unstable."
            err_temp = Template(" ".join([err1, err2, err3]))

            if bca_idx_low <= 10:
                warnings.warn(err_temp.substitute(lim_type="lower",
                                                  loc="bottom"),
                              stacklevel=1)

            if bca_idx_high >= resamples - 9:
                warnings.warn(err_temp.substitute(lim_type="upper", loc="top"),
                              stacklevel=1)

        else:
            err1 = "The $lim_type limit of the BCa interval cannot be computed."
            err2 = "It is set to the effect size itself."
            err3 = "All bootstrap values were likely all the same."
            err_temp = Template(" ".join([err1, err2, err3]))

            if isnan(bca_idx_low):
                self.__bca_low = self.__difference
                warnings.warn(err_temp.substitute(lim_type="lower"),
                              stacklevel=0)

            if isnan(bca_idx_high):
                self.__bca_high = self.__difference
                warnings.warn(err_temp.substitute(lim_type="upper"),
                              stacklevel=0)

        # Compute percentile intervals.
        pct_idx_low = int((self.__alpha / 2) * resamples)
        pct_idx_high = int((1 - (self.__alpha / 2)) * resamples)

        self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
        self.__pct_low = self.__bootstraps[pct_idx_low]
        self.__pct_high = self.__bootstraps[pct_idx_high]

        # Perform statistical tests.
        if is_paired is True:
            # Wilcoxon, a non-parametric version of the paired T-test.
            wilcoxon = spstats.wilcoxon(control, test)
            self.__pvalue_wilcoxon = wilcoxon.pvalue
            self.__statistic_wilcoxon = wilcoxon.statistic

            if effect_size != "median_diff":
                # Paired Student's t-test.
                paired_t = spstats.ttest_rel(control, test, nan_policy='omit')
                self.__pvalue_paired_students_t = paired_t.pvalue
                self.__statistic_paired_students_t = paired_t.statistic

                standardized_es = es.cohens_d(control, test, is_paired=True)
                # self.__power = power.tt_solve_power(standardized_es,
                #                                     len(control),
                #                                     alpha=self.__alpha)

        elif effect_size == "cliffs_delta":
            # Let's go with Brunner-Munzel!
            brunner_munzel = spstats.brunnermunzel(control,
                                                   test,
                                                   nan_policy='omit')
            self.__pvalue_brunner_munzel = brunner_munzel.pvalue
            self.__statistic_brunner_munzel = brunner_munzel.statistic

        elif effect_size == "median_diff":
            # According to scipy's documentation of the function,
            # "The Kruskal-Wallis H-test tests the null hypothesis
            # that the population median of all of the groups are equal."
            kruskal = spstats.kruskal(control, test, nan_policy='omit')
            self.__pvalue_kruskal = kruskal.pvalue
            self.__statistic_kruskal = kruskal.statistic
            # self.__power = np.nan

        else:  # for mean difference, Cohen's d, and Hedges' g.
            # Welch's t-test, assumes normality of distributions,
            # but does not assume equal variances.
            welch = spstats.ttest_ind(control,
                                      test,
                                      equal_var=False,
                                      nan_policy='omit')
            self.__pvalue_welch = welch.pvalue
            self.__statistic_welch = welch.statistic

            # Student's t-test, assumes normality of distributions,
            # as well as assumption of equal variances.
            students_t = spstats.ttest_ind(control,
                                           test,
                                           equal_var=True,
                                           nan_policy='omit')
            self.__pvalue_students_t = students_t.pvalue
            self.__statistic_students_t = students_t.statistic

            # Mann-Whitney test: Non parametric,
            # does not assume normality of distributions
            try:
                mann_whitney = spstats.mannwhitneyu(control,
                                                    test,
                                                    alternative='two-sided')
                self.__pvalue_mann_whitney = mann_whitney.pvalue
                self.__statistic_mann_whitney = mann_whitney.statistic
            except ValueError:
                # Occurs when the control and test are exactly identical
                # in terms of rank (eg. all zeros.)
                pass

            standardized_es = es.cohens_d(control, test, is_paired=False)
예제 #2
0
 def time_brunnermunzel(self, alternative, nan_policy, distribution):
     stats.brunnermunzel(self.u1,
                         self.u2,
                         alternative=alternative,
                         distribution=distribution,
                         nan_policy=nan_policy)
예제 #3
0
def hypotest(df=None, x="", y="", q1="", q2=""):
    print("\n--- HYPOTHESIS TESTS --- ")

    if y:
        medians = df.groupby(y)[x].median()
        means = df.groupby(y)[x].mean()
        stds = df.groupby(y)[x].std()

        #get np arrays
        x_y_true = df.loc[df[y] == 1][x].values
        x_y_false = df.loc[df[y] == 0][x].values
    elif q1 and q2:
        medians = df.query(q1)[x].median(), df.query(q2)[x].median()
        means = df.query(q1)[x].mean(), df.query(q2)[x].mean()
        stds = df.query(q1)[x].std(), df.query(q2)[x].std()

        #get np arrays
        x_y_true = df.query(q1)[x].values
        x_y_false = df.query(q2)[x].values

    else:
        print("No condition in hypotest..")
        exit(1)

    print('~' * 40)
    print("\nMean, Std & Median")
    print("(%s, %s)" % (q1, q2))
    print(80 * "-")

    print("Mean values", means)
    print("Std deviations", stds)
    print("Median values", medians)

    print(80 * "-")
    print(
        "\nNormality tests (whether a data sample has a normal distribution)")
    print(80 * "-")

    print("\nShapiro-Wilk:")
    print(
        "H0: the sample for variable %s has a Gaussian distribution for positive %s"
        % (x, y))
    if x_y_true.size >= 3:
        stat, p = shapiro(x_y_true)
        print_stat_p(stat, p)
        print_normality_test(p)
    else:
        print("Cannot be performed because of size", x_y_true.size)

    print(
        "\nH0: the sample for variable %s has a Gaussian distribution for negative %s"
        % (x, y))
    if x_y_false.size >= 3:
        stat, p = shapiro(x_y_false)
        print_stat_p(stat, p)
        print_normality_test(p)
    else:
        print("Cannot be performed because of size", x_y_false.size)

    # note: we need to avoid
    # ValueError: skewtest is not valid with less than 8 samples; 5 samples were given.
    print('~' * 40)
    print("\nD'Agostino’s K^2 Test")
    print(
        "H0: the sample for variable %s has a Gaussian distribution for positive %s"
        % (x, y))
    if x_y_true.size >= 8:
        stat, p = normaltest(x_y_true)
        print_stat_p(stat, p)
        print_normality_test(p)
    else:
        print("Cannot be performed because of size", x_y_true.size)

    print(
        "\nH0: the sample for variable %s has a Gaussian distribution for negative %s"
        % (x, y))
    if x_y_false.size >= 8:
        stat, p = normaltest(x_y_false)
        print_stat_p(stat, p)
        print_normality_test(p)
    else:
        print("Cannot be performed because of size", x_y_false.size)

    print('~' * 40)
    print("\nAnderson-Darling Test")
    print(
        "H0: the sample for variable %s has a Gaussian distribution for positive %s"
        % (x, y))
    result = anderson(x_y_true)
    print('stat=%.6f' % (result.statistic))
    for i in range(len(result.critical_values)):
        sl, cv = result.significance_level[i], result.critical_values[i]
        if result.statistic < cv:
            print('Probably Normal at the %.1f%% level' % (sl))
        else:
            print('Probably not Normal at the %.1f%% level' % (sl))

    print(
        "\H0: the sample for variable %s has a Gaussian distribution for negative %s"
        % (x, y))
    result = anderson(x_y_false)
    print('stat=%.6f' % (result.statistic))
    for i in range(len(result.critical_values)):
        sl, cv = result.significance_level[i], result.critical_values[i]
        if result.statistic < cv:
            print('Probably Normal at the %.1f%% CL' % (sl))
        else:
            print('Probably not Normal at the %.1f%% CL' % (sl))

    print(80 * "-")
    print("\nNonparametric Statistical Hypothesis Tests")
    print(80 * "-")

    print("\nMann-Whitney U (rank) test:")
    info_M_W()
    print(
        "\nH0: the distributions of both samples for variable %s are equal (negative or positive %s)"
        % (x, y))
    stat, p = mannwhitneyu(x_y_true, x_y_false)
    print_stat_p(stat, p)
    print_hypo(p)

    print('~' * 40)
    print("\nKruskal-Wallis H test:")
    info_K_W()
    print(
        "\nH0: the distributions of all samples for variable %s are equal (negative or positive %s)"
        % (x, y))
    stat, p = kruskal(x_y_true, x_y_false)
    print_stat_p(stat, p)
    print_hypo(p)

    #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html#scipy.stats.ks_2samp
    print('~' * 40)
    print("\nKolmogorov-Smirnov test:")
    info_K_S()
    print(
        "\nH0: The 2 independent samples for the variable %s are drawn from the same continuous distribution (negative or positive %s)"
        % (x, y))
    stat, p = ks_2samp(x_y_true, x_y_false)
    print_stat_p(stat, p)
    print_hypo(p)

    print('~' * 40)
    print("\nKolmogorov-Smirnov test using cumulative distributions:")
    print(
        "\nH0: The 2 independent samples for the variable %s are drawn from the same continuous distribution (negative or positive %s)"
        % (x, y))
    x_y_true_cum = np.cumsum(x_y_true)
    x_y_false_cum = np.cumsum(x_y_false)
    stat, p = ks_2samp(x_y_true_cum, x_y_false_cum)
    print_stat_p(stat, p)
    print_hypo(p)

    #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.brunnermunzel.html#scipy.stats.brunnermunzel
    print('~' * 40)
    print("\nBrunner-Munzel test:")
    info_B_M()
    print(
        "\nH0: when values are taken one by one from each group of the variable %s, the probabilities of getting large values in both groups are equal (negative or positive %s)"
        % (x, y))
    stat, p = brunnermunzel(x_y_true, x_y_false)
    print_stat_p(stat, p)
    print_hypo(p)

    print(80 * "-")
    print("\nParametric Statistical Hypothesis Tests")
    print(80 * "-")

    #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
    print("\nStudent's t-test:")
    info_t_test()
    print(
        "\nH0: there is association in variable %s for positive or negative %s"
        % (x, y))
    stat, p = ttest_ind(x_y_true, x_y_false)
    print_stat_p(stat, p)
    print_hypo(p)

    print('~' * 40)
    print("\nStudent's t-test (two-sided for checking identical means):")
    print("H0: the means of two distributions are identical")
    ttest, pval_t, dof = weightstats.CompareMeans.from_data(
        data1=x_y_true, data2=x_y_false).ttest_ind(alternative="two-sided",
                                                   usevar="pooled",
                                                   value=0)

    print("t-test = %f p-value = %f DoF = %i" % (ttest, pval_t, dof))
    print_hypo(pval_t)

    #https://www.statsmodels.org/stable/generated/statsmodels.stats.weightstats.ztest.html
    print('~' * 40)
    print("\nANOVA (one way):")
    info_anova()
    print(
        "\nH0=the means of the %s samples are equal for positive or negative %s"
        % (x, y))
    stat, p = f_oneway(x_y_true, x_y_false)
    print_stat_p(stat, p)
    print_hypo(p)

    #https://www.statsmodels.org/stable/generated/statsmodels.stats.weightstats.ztest.html
    print('~' * 40)
    print("\nZ-test: two-sided")
    info_Z()
    print("\nH0 : the mean of two independent groups is the same")
    stat, p = weightstats.ztest(x1=x_y_true,
                                x2=x_y_false,
                                value=0,
                                alternative='two-sided')
    print_stat_p(stat, p)
    print_hypo(p)

    #https://www.statsmodels.org/stable/generated/statsmodels.stats.weightstats.CompareMeans.ztest_ind.html#statsmodels.stats.weightstats.CompareMeans.ztest_ind
    print('~' * 40)
    print("\nZ-test: Two-sided test statistic for checking identical means.")
    print("\nH0: the means of two distributions are identical")
    stat, p = weightstats.CompareMeans.from_data(data1=x_y_true,
                                                 data2=x_y_false).ztest_ind(
                                                     alternative="two-sided",
                                                     usevar="pooled",
                                                     value=0)
    print_stat_p(stat, p)
    print_hypo(p)
    if 'POP' in field:
        xt = [0.001, 0.1, 1, 10]
        xl = np.log(xt)
        plt.xticks(xl, xt)

    # clean up and save the dang fig
    plt.tight_layout()
    plt.savefig(plots + 'density_dist/png/both-' + field + '.png', dpi=150)
    plt.savefig(plots + 'density_dist/svg/both-' + field + '.svg')
    plt.close()

# do some hacky stuff to run brunner munzel tests
# test mean temperature differences
bck = bck_data[-4]
field = 'LST_m'
cov_ae = np.array(ae[field])
cov_aa = np.array(aa[field])

# subset to just good data values
cov_ae = cov_ae[cov_ae != no_data]
cov_aa = cov_aa[cov_aa != no_data]
bck = bck[bck != no_data]

wae, pae = stats.brunnermunzel(cov_ae, bck)
waa, paa = stats.brunnermunzel(cov_aa, bck)
print('LST significance testing')
print('aedes aegypti   : p = {:0.3f}'.format(pae))
print('aedes albopictus: p = {:0.3f}'.format(paa))

# just do the rest via command line..
예제 #5
0
def testability_improvement_statistical_test(test_all=True):
    """

    """

    if test_all:
        # For all projects
        df_before = pd.DataFrame()
        df_after = pd.DataFrame()
        for project, paths in project_name_path_dict.items():
            df1 = pd.read_csv(paths[0], index_col=False)
            df2 = pd.read_csv(paths[1], index_col=False)
            df_before = pd.concat([df_before, df1], ignore_index=True)
            df_after = pd.concat([df_after, df2], ignore_index=True)
    else:
        # For a single project
        df_before = pd.read_csv(project_name_path_dict['JHotDraw'][0],
                                index_col=False)

        df_after = pd.read_csv(project_name_path_dict['JHotDraw'][1],
                               index_col=False)

    print(df_before.describe())
    print(df_after.describe())

    tests = []
    meters = ['PredictedTestability', 'LineCoverage', 'BranchCoverage']
    for meter in meters:
        print(f'p-value for {meter}')
        absolute_meter_gain = df_after[meter].sum() - df_before[meter].sum()
        relative_meter_gain = (df_after[meter].sum() - df_before[meter].sum()
                               ) / df_before[meter].sum()
        print(f'Absolute {meter} gain: {absolute_meter_gain}')
        print(f'Relative {meter} gain: {relative_meter_gain}')

        s, p = ttest_ind(
            df_after[meter],
            df_before[meter],
            alternative="greater",
        )
        print(f'1 statistic independent t-test = {s}, p-value={p:.4E}',
              'Passed' if p < 0.05 else 'Failed')

        s, p = mannwhitneyu(
            df_after[meter],
            df_before[meter],
            alternative="greater",
        )
        print(f'2 statistic Mann-Whitney U test = {s}, p-value={p:.4E}',
              'Passed' if p < 0.05 else 'Failed')

        s, p = ranksums(
            df_after[meter],
            df_before[meter],
            alternative="greater",
        )
        print(f'3 statistic Wilcoxon rank-sum test = {s}, p-value={p:.4E}',
              'Passed' if p < 0.05 else 'Failed')

        s, p = brunnermunzel(
            df_after[meter],
            df_before[meter],
            alternative="greater",
        )
        print(f'4 statistic Brunner-Munzel test = {s}, p-value={p:.4E}',
              'Passed' if p < 0.05 else 'Failed')

        s, p = kruskal(
            df_after[meter],
            df_before[meter],
        )
        print(f'4 statistic Kruskal test = {s}, p-value={p:.4E}',
              'Passed' if p < 0.05 else 'Failed')

        if len(df_before[meter]) == len(df_after[meter]):
            s, p = wilcoxon(
                df_after[meter],
                df_before[meter],
                alternative="greater",
            )
            print(f'5 statistic Wilcoxon test = {s}, p-value={p:.4E}',
                  'Passed' if p < 0.05 else 'Failed')

        print('-' * 50)
def custom(a, b):
    v, p = stats.brunnermunzel(a, b)
    return p