def pivot(self, elec=None, in_device=False, sort=False): """ Return the pivoting indices for a specific electrode (in the device region) or the device Parameters ---------- elec : str or int the corresponding electrode to return the pivoting indices from in_device : bool, optional If ``True`` the pivoting table will be translated to the device region orbitals. If `sort` is also true, this would correspond to the orbitals directly translated to the geometry ``self.geometry.sub(self.a_dev)``. sort : bool, optional Whether the returned indices are sorted. Mostly useful if you want to handle the device in a non-pivoted order. Examples -------- >>> se = tbtncSileTBtrans(...) >>> se.pivot() [3, 4, 6, 5, 2] >>> se.pivot(sort=True) [2, 3, 4, 5, 6] >>> se.pivot(0) [2, 3] >>> se.pivot(0, in_device=True) [4, 0] >>> se.pivot(0, in_device=True, sort=True) [0, 1] >>> se.pivot(0, sort=True) [2, 3] See Also -------- pivot_down : for the pivot table for electrodes down-folding regions """ if elec is None: if in_device and sort: return _a.arangei(self.no_d) pvt = self._value('pivot') - 1 if in_device: # Count number of elements that we need to subtract from each orbital subn = _a.onesi(self.no) subn[pvt] = 0 pvt -= _a.cumsumi(subn)[pvt] elif sort: pvt = npsort(pvt) return pvt # Get electrode pivoting elements se_pvt = self._value('pivot', tree=self._elec(elec)) - 1 if sort: # Sort pivoting indices # Since we know that pvt is also sorted, then # the resulting in_device would also return sorted # indices se_pvt = npsort(se_pvt) if in_device: pvt = self._value('pivot') - 1 if sort: pvt = npsort(pvt) # translate to the device indices se_pvt = indices(pvt, se_pvt, 0) return se_pvt
def __init__(self, control, test, effect_size, is_paired=False, ci=95, resamples=5000, random_seed=12345): """ Compute the effect size between two groups. Parameters ---------- control : array-like test : array-like These should be numerical iterables. effect_size : string. Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' is_paired : boolean, default False resamples : int, default 5000 The number of bootstrap resamples to be taken. ci : float, default 95 The confidence interval width. The default of 95 produces 95% confidence intervals. random_seed : int, default 12345 `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. Returns ------- A :py:class:`TwoGroupEffectSize` object. difference : float The effect size of the difference between the control and the test. effect_size : string The type of effect size reported. is_paired : boolean Whether or not the difference is paired (ie. repeated measures). ci : float Returns the width of the confidence interval, in percent. alpha : float Returns the significance level of the statistical test as a float between 0 and 1. resamples : int The number of resamples performed during the bootstrap procedure. bootstraps : nmupy ndarray The generated bootstraps of the effect size. random_seed : int The number used to initialise the numpy random seed generator, ie. `seed_value` from `numpy.random.seed(seed_value)` is returned. bca_low, bca_high : float The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively. pct_low, pct_high : float The percentile confidence interval lower limit and upper limits, respectively. Examples -------- >>> import numpy as np >>> import scipy as sp >>> import dabest >>> np.random.seed(12345) >>> control = sp.stats.norm.rvs(loc=0, size=30) >>> test = sp.stats.norm.rvs(loc=0.5, size=30) >>> effsize = dabest.TwoGroupsEffectSize(control, test, "mean_diff") >>> effsize The unpaired mean difference is -0.253 [95%CI -0.782, 0.241] 5000 bootstrap samples. The confidence interval is bias-corrected and accelerated. >>> effsize.to_dict() {'alpha': 0.05, 'bca_high': 0.2413346581369784, 'bca_interval_idx': (109, 4858), 'bca_low': -0.7818088458343655, 'bootstraps': array([-1.09875628, -1.08840014, -1.08258695, ..., 0.66675324, 0.75814087, 0.80848265]), 'ci': 95, 'difference': -0.25315417702752846, 'effect_size': 'mean difference', 'is_paired': False, 'pct_high': 0.25135646125431527, 'pct_interval_idx': (125, 4875), 'pct_low': -0.763588353717278, 'pvalue_brunner_munzel': nan, 'pvalue_kruskal': nan, 'pvalue_mann_whitney': 0.2600723060808019, 'pvalue_paired_students_t': nan, 'pvalue_students_t': 0.34743913903372836, 'pvalue_welch': 0.3474493875548965, 'pvalue_wilcoxon': nan, 'random_seed': 12345, 'resamples': 5000, 'statistic_brunner_munzel': nan, 'statistic_kruskal': nan, 'statistic_mann_whitney': 406.0, 'statistic_paired_students_t': nan, 'statistic_students_t': 0.9472545159069105, 'statistic_welch': 0.9472545159069105, 'statistic_wilcoxon': nan} """ from numpy import array, isnan from numpy import sort as npsort from numpy.random import choice, seed import scipy.stats as spstats # import statsmodels.stats.power as power from string import Template import warnings from ._stats_tools import confint_2group_diff as ci2g from ._stats_tools import effsize as es self.__EFFECT_SIZE_DICT = { "mean_diff": "mean difference", "median_diff": "median difference", "cohens_d": "Cohen's d", "hedges_g": "Hedges' g", "cliffs_delta": "Cliff's delta" } kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] if effect_size not in kosher_es: err1 = "The effect size '{}'".format(effect_size) err2 = "is not one of {}".format(kosher_es) raise ValueError(" ".join([err1, err2])) if effect_size == "cliffs_delta" and is_paired is True: err1 = "`paired` is True; therefore Cliff's delta is not defined." raise ValueError(err1) # Convert to numpy arrays for speed. # NaNs are automatically dropped. control = array(control) test = array(test) control = control[~isnan(control)] test = test[~isnan(test)] self.__effect_size = effect_size self.__control = control self.__test = test self.__is_paired = is_paired self.__resamples = resamples self.__random_seed = random_seed self.__ci = ci self.__alpha = ci2g._compute_alpha_from_ci(ci) self.__difference = es.two_group_difference(control, test, is_paired, effect_size) self.__jackknives = ci2g.compute_meandiff_jackknife( control, test, is_paired, effect_size) self.__acceleration_value = ci2g._calc_accel(self.__jackknives) bootstraps = ci2g.compute_bootstrapped_diff(control, test, is_paired, effect_size, resamples, random_seed) self.__bootstraps = npsort(bootstraps) self.__bias_correction = ci2g.compute_meandiff_bias_correction( self.__bootstraps, self.__difference) # Compute BCa intervals. bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( self.__bias_correction, self.__acceleration_value, self.__resamples, ci) self.__bca_interval_idx = (bca_idx_low, bca_idx_high) if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): self.__bca_low = self.__bootstraps[bca_idx_low] self.__bca_high = self.__bootstraps[bca_idx_high] err1 = "The $lim_type limit of the interval" err2 = "was in the $loc 10 values." err3 = "The result should be considered unstable." err_temp = Template(" ".join([err1, err2, err3])) if bca_idx_low <= 10: warnings.warn(err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1) if bca_idx_high >= resamples - 9: warnings.warn(err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1) else: err1 = "The $lim_type limit of the BCa interval cannot be computed." err2 = "It is set to the effect size itself." err3 = "All bootstrap values were likely all the same." err_temp = Template(" ".join([err1, err2, err3])) if isnan(bca_idx_low): self.__bca_low = self.__difference warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0) if isnan(bca_idx_high): self.__bca_high = self.__difference warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0) # Compute percentile intervals. pct_idx_low = int((self.__alpha / 2) * resamples) pct_idx_high = int((1 - (self.__alpha / 2)) * resamples) self.__pct_interval_idx = (pct_idx_low, pct_idx_high) self.__pct_low = self.__bootstraps[pct_idx_low] self.__pct_high = self.__bootstraps[pct_idx_high] # Perform statistical tests. if is_paired is True: # Wilcoxon, a non-parametric version of the paired T-test. wilcoxon = spstats.wilcoxon(control, test) self.__pvalue_wilcoxon = wilcoxon.pvalue self.__statistic_wilcoxon = wilcoxon.statistic if effect_size != "median_diff": # Paired Student's t-test. paired_t = spstats.ttest_rel(control, test, nan_policy='omit') self.__pvalue_paired_students_t = paired_t.pvalue self.__statistic_paired_students_t = paired_t.statistic standardized_es = es.cohens_d(control, test, is_paired=True) # self.__power = power.tt_solve_power(standardized_es, # len(control), # alpha=self.__alpha) elif effect_size == "cliffs_delta": # Let's go with Brunner-Munzel! brunner_munzel = spstats.brunnermunzel(control, test, nan_policy='omit') self.__pvalue_brunner_munzel = brunner_munzel.pvalue self.__statistic_brunner_munzel = brunner_munzel.statistic elif effect_size == "median_diff": # According to scipy's documentation of the function, # "The Kruskal-Wallis H-test tests the null hypothesis # that the population median of all of the groups are equal." kruskal = spstats.kruskal(control, test, nan_policy='omit') self.__pvalue_kruskal = kruskal.pvalue self.__statistic_kruskal = kruskal.statistic # self.__power = np.nan else: # for mean difference, Cohen's d, and Hedges' g. # Welch's t-test, assumes normality of distributions, # but does not assume equal variances. welch = spstats.ttest_ind(control, test, equal_var=False, nan_policy='omit') self.__pvalue_welch = welch.pvalue self.__statistic_welch = welch.statistic # Student's t-test, assumes normality of distributions, # as well as assumption of equal variances. students_t = spstats.ttest_ind(control, test, equal_var=True, nan_policy='omit') self.__pvalue_students_t = students_t.pvalue self.__statistic_students_t = students_t.statistic # Mann-Whitney test: Non parametric, # does not assume normality of distributions try: mann_whitney = spstats.mannwhitneyu(control, test, alternative='two-sided') self.__pvalue_mann_whitney = mann_whitney.pvalue self.__statistic_mann_whitney = mann_whitney.statistic except ValueError: # Occurs when the control and test are exactly identical # in terms of rank (eg. all zeros.) pass standardized_es = es.cohens_d(control, test, is_paired=False)
def summary_ci_1group(x, func, resamples=5000, alpha=0.05, random_seed=12345, sort_bootstraps=True, *args, **kwargs): """ Given an array-like x, returns func(x), and a bootstrap confidence interval of func(x). Keywords -------- x: array-like An numerical iterable. func: function The function to be applied to x. resamples: int, default 5000 The number of bootstrap resamples to be taken of func(x). alpha: float, default 0.05 Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. random_seed: int, default 12345 `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. sort_bootstraps: boolean, default True Returns ------- A dictionary with the following five keys: 'summary': float. The outcome of func(x). 'func': function. The function applied to x. 'bca_ci_low': float 'bca_ci_high': float. The bias-corrected and accelerated confidence interval, for the given alpha. 'bootstraps': array. The bootstraps used to generate the confidence interval. These will be sorted in ascending order if `sort_bootstraps` was True. """ from . import confint_2group_diff as ci2g from numpy import sort as npsort boots = compute_1group_bootstraps(x, func, resamples, random_seed) bias = compute_1group_bias_correction(x, boots, func) jk = compute_1group_jackknife(x, func) accel = ci2g._calc_accel(jk) del jk ci_idx = ci2g.compute_interval_limits(bias, accel, resamples, alpha) boots_sorted = npsort(boots) low = boots_sorted[ci_idx[0]] high = boots_sorted[ci_idx[1]] if sort_bootstraps: B = boots_sorted else: B = boots del boots del boots_sorted out = { 'summary': func(x), 'func': func, 'bca_ci_low': low, 'bca_ci_high': high, 'bootstraps': B } del B return out