def __init__(self, control, test, effect_size, is_paired=False, ci=95, resamples=5000, random_seed=12345): """ Compute the effect size between two groups. Parameters ---------- control : array-like test : array-like These should be numerical iterables. effect_size : string. Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' is_paired : boolean, default False resamples : int, default 5000 The number of bootstrap resamples to be taken. ci : float, default 95 The confidence interval width. The default of 95 produces 95% confidence intervals. random_seed : int, default 12345 `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. Returns ------- A :py:class:`TwoGroupEffectSize` object. difference : float The effect size of the difference between the control and the test. effect_size : string The type of effect size reported. is_paired : boolean Whether or not the difference is paired (ie. repeated measures). ci : float Returns the width of the confidence interval, in percent. alpha : float Returns the significance level of the statistical test as a float between 0 and 1. resamples : int The number of resamples performed during the bootstrap procedure. bootstraps : nmupy ndarray The generated bootstraps of the effect size. random_seed : int The number used to initialise the numpy random seed generator, ie. `seed_value` from `numpy.random.seed(seed_value)` is returned. bca_low, bca_high : float The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively. pct_low, pct_high : float The percentile confidence interval lower limit and upper limits, respectively. Examples -------- >>> import numpy as np >>> import scipy as sp >>> import dabest >>> np.random.seed(12345) >>> control = sp.stats.norm.rvs(loc=0, size=30) >>> test = sp.stats.norm.rvs(loc=0.5, size=30) >>> effsize = dabest.TwoGroupsEffectSize(control, test, "mean_diff") >>> effsize The unpaired mean difference is -0.253 [95%CI -0.782, 0.241] 5000 bootstrap samples. The confidence interval is bias-corrected and accelerated. >>> effsize.to_dict() {'alpha': 0.05, 'bca_high': 0.2413346581369784, 'bca_interval_idx': (109, 4858), 'bca_low': -0.7818088458343655, 'bootstraps': array([-1.09875628, -1.08840014, -1.08258695, ..., 0.66675324, 0.75814087, 0.80848265]), 'ci': 95, 'difference': -0.25315417702752846, 'effect_size': 'mean difference', 'is_paired': False, 'pct_high': 0.25135646125431527, 'pct_interval_idx': (125, 4875), 'pct_low': -0.763588353717278, 'pvalue_brunner_munzel': nan, 'pvalue_kruskal': nan, 'pvalue_mann_whitney': 0.2600723060808019, 'pvalue_paired_students_t': nan, 'pvalue_students_t': 0.34743913903372836, 'pvalue_welch': 0.3474493875548965, 'pvalue_wilcoxon': nan, 'random_seed': 12345, 'resamples': 5000, 'statistic_brunner_munzel': nan, 'statistic_kruskal': nan, 'statistic_mann_whitney': 406.0, 'statistic_paired_students_t': nan, 'statistic_students_t': 0.9472545159069105, 'statistic_welch': 0.9472545159069105, 'statistic_wilcoxon': nan} """ from numpy import array, isnan from numpy import sort as npsort from numpy.random import choice, seed import scipy.stats as spstats # import statsmodels.stats.power as power from string import Template import warnings from ._stats_tools import confint_2group_diff as ci2g from ._stats_tools import effsize as es self.__EFFECT_SIZE_DICT = { "mean_diff": "mean difference", "median_diff": "median difference", "cohens_d": "Cohen's d", "hedges_g": "Hedges' g", "cliffs_delta": "Cliff's delta" } kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] if effect_size not in kosher_es: err1 = "The effect size '{}'".format(effect_size) err2 = "is not one of {}".format(kosher_es) raise ValueError(" ".join([err1, err2])) if effect_size == "cliffs_delta" and is_paired is True: err1 = "`paired` is True; therefore Cliff's delta is not defined." raise ValueError(err1) # Convert to numpy arrays for speed. # NaNs are automatically dropped. control = array(control) test = array(test) control = control[~isnan(control)] test = test[~isnan(test)] self.__effect_size = effect_size self.__control = control self.__test = test self.__is_paired = is_paired self.__resamples = resamples self.__random_seed = random_seed self.__ci = ci self.__alpha = ci2g._compute_alpha_from_ci(ci) self.__difference = es.two_group_difference(control, test, is_paired, effect_size) self.__jackknives = ci2g.compute_meandiff_jackknife( control, test, is_paired, effect_size) self.__acceleration_value = ci2g._calc_accel(self.__jackknives) bootstraps = ci2g.compute_bootstrapped_diff(control, test, is_paired, effect_size, resamples, random_seed) self.__bootstraps = npsort(bootstraps) self.__bias_correction = ci2g.compute_meandiff_bias_correction( self.__bootstraps, self.__difference) # Compute BCa intervals. bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( self.__bias_correction, self.__acceleration_value, self.__resamples, ci) self.__bca_interval_idx = (bca_idx_low, bca_idx_high) if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): self.__bca_low = self.__bootstraps[bca_idx_low] self.__bca_high = self.__bootstraps[bca_idx_high] err1 = "The $lim_type limit of the interval" err2 = "was in the $loc 10 values." err3 = "The result should be considered unstable." err_temp = Template(" ".join([err1, err2, err3])) if bca_idx_low <= 10: warnings.warn(err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1) if bca_idx_high >= resamples - 9: warnings.warn(err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1) else: err1 = "The $lim_type limit of the BCa interval cannot be computed." err2 = "It is set to the effect size itself." err3 = "All bootstrap values were likely all the same." err_temp = Template(" ".join([err1, err2, err3])) if isnan(bca_idx_low): self.__bca_low = self.__difference warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0) if isnan(bca_idx_high): self.__bca_high = self.__difference warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0) # Compute percentile intervals. pct_idx_low = int((self.__alpha / 2) * resamples) pct_idx_high = int((1 - (self.__alpha / 2)) * resamples) self.__pct_interval_idx = (pct_idx_low, pct_idx_high) self.__pct_low = self.__bootstraps[pct_idx_low] self.__pct_high = self.__bootstraps[pct_idx_high] # Perform statistical tests. if is_paired is True: # Wilcoxon, a non-parametric version of the paired T-test. wilcoxon = spstats.wilcoxon(control, test) self.__pvalue_wilcoxon = wilcoxon.pvalue self.__statistic_wilcoxon = wilcoxon.statistic if effect_size != "median_diff": # Paired Student's t-test. paired_t = spstats.ttest_rel(control, test, nan_policy='omit') self.__pvalue_paired_students_t = paired_t.pvalue self.__statistic_paired_students_t = paired_t.statistic standardized_es = es.cohens_d(control, test, is_paired=True) # self.__power = power.tt_solve_power(standardized_es, # len(control), # alpha=self.__alpha) elif effect_size == "cliffs_delta": # Let's go with Brunner-Munzel! brunner_munzel = spstats.brunnermunzel(control, test, nan_policy='omit') self.__pvalue_brunner_munzel = brunner_munzel.pvalue self.__statistic_brunner_munzel = brunner_munzel.statistic elif effect_size == "median_diff": # According to scipy's documentation of the function, # "The Kruskal-Wallis H-test tests the null hypothesis # that the population median of all of the groups are equal." kruskal = spstats.kruskal(control, test, nan_policy='omit') self.__pvalue_kruskal = kruskal.pvalue self.__statistic_kruskal = kruskal.statistic # self.__power = np.nan else: # for mean difference, Cohen's d, and Hedges' g. # Welch's t-test, assumes normality of distributions, # but does not assume equal variances. welch = spstats.ttest_ind(control, test, equal_var=False, nan_policy='omit') self.__pvalue_welch = welch.pvalue self.__statistic_welch = welch.statistic # Student's t-test, assumes normality of distributions, # as well as assumption of equal variances. students_t = spstats.ttest_ind(control, test, equal_var=True, nan_policy='omit') self.__pvalue_students_t = students_t.pvalue self.__statistic_students_t = students_t.statistic # Mann-Whitney test: Non parametric, # does not assume normality of distributions try: mann_whitney = spstats.mannwhitneyu(control, test, alternative='two-sided') self.__pvalue_mann_whitney = mann_whitney.pvalue self.__statistic_mann_whitney = mann_whitney.statistic except ValueError: # Occurs when the control and test are exactly identical # in terms of rank (eg. all zeros.) pass standardized_es = es.cohens_d(control, test, is_paired=False)
def time_brunnermunzel(self, alternative, nan_policy, distribution): stats.brunnermunzel(self.u1, self.u2, alternative=alternative, distribution=distribution, nan_policy=nan_policy)
def hypotest(df=None, x="", y="", q1="", q2=""): print("\n--- HYPOTHESIS TESTS --- ") if y: medians = df.groupby(y)[x].median() means = df.groupby(y)[x].mean() stds = df.groupby(y)[x].std() #get np arrays x_y_true = df.loc[df[y] == 1][x].values x_y_false = df.loc[df[y] == 0][x].values elif q1 and q2: medians = df.query(q1)[x].median(), df.query(q2)[x].median() means = df.query(q1)[x].mean(), df.query(q2)[x].mean() stds = df.query(q1)[x].std(), df.query(q2)[x].std() #get np arrays x_y_true = df.query(q1)[x].values x_y_false = df.query(q2)[x].values else: print("No condition in hypotest..") exit(1) print('~' * 40) print("\nMean, Std & Median") print("(%s, %s)" % (q1, q2)) print(80 * "-") print("Mean values", means) print("Std deviations", stds) print("Median values", medians) print(80 * "-") print( "\nNormality tests (whether a data sample has a normal distribution)") print(80 * "-") print("\nShapiro-Wilk:") print( "H0: the sample for variable %s has a Gaussian distribution for positive %s" % (x, y)) if x_y_true.size >= 3: stat, p = shapiro(x_y_true) print_stat_p(stat, p) print_normality_test(p) else: print("Cannot be performed because of size", x_y_true.size) print( "\nH0: the sample for variable %s has a Gaussian distribution for negative %s" % (x, y)) if x_y_false.size >= 3: stat, p = shapiro(x_y_false) print_stat_p(stat, p) print_normality_test(p) else: print("Cannot be performed because of size", x_y_false.size) # note: we need to avoid # ValueError: skewtest is not valid with less than 8 samples; 5 samples were given. print('~' * 40) print("\nD'Agostino’s K^2 Test") print( "H0: the sample for variable %s has a Gaussian distribution for positive %s" % (x, y)) if x_y_true.size >= 8: stat, p = normaltest(x_y_true) print_stat_p(stat, p) print_normality_test(p) else: print("Cannot be performed because of size", x_y_true.size) print( "\nH0: the sample for variable %s has a Gaussian distribution for negative %s" % (x, y)) if x_y_false.size >= 8: stat, p = normaltest(x_y_false) print_stat_p(stat, p) print_normality_test(p) else: print("Cannot be performed because of size", x_y_false.size) print('~' * 40) print("\nAnderson-Darling Test") print( "H0: the sample for variable %s has a Gaussian distribution for positive %s" % (x, y)) result = anderson(x_y_true) print('stat=%.6f' % (result.statistic)) for i in range(len(result.critical_values)): sl, cv = result.significance_level[i], result.critical_values[i] if result.statistic < cv: print('Probably Normal at the %.1f%% level' % (sl)) else: print('Probably not Normal at the %.1f%% level' % (sl)) print( "\H0: the sample for variable %s has a Gaussian distribution for negative %s" % (x, y)) result = anderson(x_y_false) print('stat=%.6f' % (result.statistic)) for i in range(len(result.critical_values)): sl, cv = result.significance_level[i], result.critical_values[i] if result.statistic < cv: print('Probably Normal at the %.1f%% CL' % (sl)) else: print('Probably not Normal at the %.1f%% CL' % (sl)) print(80 * "-") print("\nNonparametric Statistical Hypothesis Tests") print(80 * "-") print("\nMann-Whitney U (rank) test:") info_M_W() print( "\nH0: the distributions of both samples for variable %s are equal (negative or positive %s)" % (x, y)) stat, p = mannwhitneyu(x_y_true, x_y_false) print_stat_p(stat, p) print_hypo(p) print('~' * 40) print("\nKruskal-Wallis H test:") info_K_W() print( "\nH0: the distributions of all samples for variable %s are equal (negative or positive %s)" % (x, y)) stat, p = kruskal(x_y_true, x_y_false) print_stat_p(stat, p) print_hypo(p) #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html#scipy.stats.ks_2samp print('~' * 40) print("\nKolmogorov-Smirnov test:") info_K_S() print( "\nH0: The 2 independent samples for the variable %s are drawn from the same continuous distribution (negative or positive %s)" % (x, y)) stat, p = ks_2samp(x_y_true, x_y_false) print_stat_p(stat, p) print_hypo(p) print('~' * 40) print("\nKolmogorov-Smirnov test using cumulative distributions:") print( "\nH0: The 2 independent samples for the variable %s are drawn from the same continuous distribution (negative or positive %s)" % (x, y)) x_y_true_cum = np.cumsum(x_y_true) x_y_false_cum = np.cumsum(x_y_false) stat, p = ks_2samp(x_y_true_cum, x_y_false_cum) print_stat_p(stat, p) print_hypo(p) #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.brunnermunzel.html#scipy.stats.brunnermunzel print('~' * 40) print("\nBrunner-Munzel test:") info_B_M() print( "\nH0: when values are taken one by one from each group of the variable %s, the probabilities of getting large values in both groups are equal (negative or positive %s)" % (x, y)) stat, p = brunnermunzel(x_y_true, x_y_false) print_stat_p(stat, p) print_hypo(p) print(80 * "-") print("\nParametric Statistical Hypothesis Tests") print(80 * "-") #https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html print("\nStudent's t-test:") info_t_test() print( "\nH0: there is association in variable %s for positive or negative %s" % (x, y)) stat, p = ttest_ind(x_y_true, x_y_false) print_stat_p(stat, p) print_hypo(p) print('~' * 40) print("\nStudent's t-test (two-sided for checking identical means):") print("H0: the means of two distributions are identical") ttest, pval_t, dof = weightstats.CompareMeans.from_data( data1=x_y_true, data2=x_y_false).ttest_ind(alternative="two-sided", usevar="pooled", value=0) print("t-test = %f p-value = %f DoF = %i" % (ttest, pval_t, dof)) print_hypo(pval_t) #https://www.statsmodels.org/stable/generated/statsmodels.stats.weightstats.ztest.html print('~' * 40) print("\nANOVA (one way):") info_anova() print( "\nH0=the means of the %s samples are equal for positive or negative %s" % (x, y)) stat, p = f_oneway(x_y_true, x_y_false) print_stat_p(stat, p) print_hypo(p) #https://www.statsmodels.org/stable/generated/statsmodels.stats.weightstats.ztest.html print('~' * 40) print("\nZ-test: two-sided") info_Z() print("\nH0 : the mean of two independent groups is the same") stat, p = weightstats.ztest(x1=x_y_true, x2=x_y_false, value=0, alternative='two-sided') print_stat_p(stat, p) print_hypo(p) #https://www.statsmodels.org/stable/generated/statsmodels.stats.weightstats.CompareMeans.ztest_ind.html#statsmodels.stats.weightstats.CompareMeans.ztest_ind print('~' * 40) print("\nZ-test: Two-sided test statistic for checking identical means.") print("\nH0: the means of two distributions are identical") stat, p = weightstats.CompareMeans.from_data(data1=x_y_true, data2=x_y_false).ztest_ind( alternative="two-sided", usevar="pooled", value=0) print_stat_p(stat, p) print_hypo(p)
if 'POP' in field: xt = [0.001, 0.1, 1, 10] xl = np.log(xt) plt.xticks(xl, xt) # clean up and save the dang fig plt.tight_layout() plt.savefig(plots + 'density_dist/png/both-' + field + '.png', dpi=150) plt.savefig(plots + 'density_dist/svg/both-' + field + '.svg') plt.close() # do some hacky stuff to run brunner munzel tests # test mean temperature differences bck = bck_data[-4] field = 'LST_m' cov_ae = np.array(ae[field]) cov_aa = np.array(aa[field]) # subset to just good data values cov_ae = cov_ae[cov_ae != no_data] cov_aa = cov_aa[cov_aa != no_data] bck = bck[bck != no_data] wae, pae = stats.brunnermunzel(cov_ae, bck) waa, paa = stats.brunnermunzel(cov_aa, bck) print('LST significance testing') print('aedes aegypti : p = {:0.3f}'.format(pae)) print('aedes albopictus: p = {:0.3f}'.format(paa)) # just do the rest via command line..
def testability_improvement_statistical_test(test_all=True): """ """ if test_all: # For all projects df_before = pd.DataFrame() df_after = pd.DataFrame() for project, paths in project_name_path_dict.items(): df1 = pd.read_csv(paths[0], index_col=False) df2 = pd.read_csv(paths[1], index_col=False) df_before = pd.concat([df_before, df1], ignore_index=True) df_after = pd.concat([df_after, df2], ignore_index=True) else: # For a single project df_before = pd.read_csv(project_name_path_dict['JHotDraw'][0], index_col=False) df_after = pd.read_csv(project_name_path_dict['JHotDraw'][1], index_col=False) print(df_before.describe()) print(df_after.describe()) tests = [] meters = ['PredictedTestability', 'LineCoverage', 'BranchCoverage'] for meter in meters: print(f'p-value for {meter}') absolute_meter_gain = df_after[meter].sum() - df_before[meter].sum() relative_meter_gain = (df_after[meter].sum() - df_before[meter].sum() ) / df_before[meter].sum() print(f'Absolute {meter} gain: {absolute_meter_gain}') print(f'Relative {meter} gain: {relative_meter_gain}') s, p = ttest_ind( df_after[meter], df_before[meter], alternative="greater", ) print(f'1 statistic independent t-test = {s}, p-value={p:.4E}', 'Passed' if p < 0.05 else 'Failed') s, p = mannwhitneyu( df_after[meter], df_before[meter], alternative="greater", ) print(f'2 statistic Mann-Whitney U test = {s}, p-value={p:.4E}', 'Passed' if p < 0.05 else 'Failed') s, p = ranksums( df_after[meter], df_before[meter], alternative="greater", ) print(f'3 statistic Wilcoxon rank-sum test = {s}, p-value={p:.4E}', 'Passed' if p < 0.05 else 'Failed') s, p = brunnermunzel( df_after[meter], df_before[meter], alternative="greater", ) print(f'4 statistic Brunner-Munzel test = {s}, p-value={p:.4E}', 'Passed' if p < 0.05 else 'Failed') s, p = kruskal( df_after[meter], df_before[meter], ) print(f'4 statistic Kruskal test = {s}, p-value={p:.4E}', 'Passed' if p < 0.05 else 'Failed') if len(df_before[meter]) == len(df_after[meter]): s, p = wilcoxon( df_after[meter], df_before[meter], alternative="greater", ) print(f'5 statistic Wilcoxon test = {s}, p-value={p:.4E}', 'Passed' if p < 0.05 else 'Failed') print('-' * 50)
def custom(a, b): v, p = stats.brunnermunzel(a, b) return p