Exemple #1
0
    def pivot(self, elec=None, in_device=False, sort=False):
        """ Return the pivoting indices for a specific electrode (in the device region) or the device

        Parameters
        ----------
        elec : str or int
           the corresponding electrode to return the pivoting indices from
        in_device : bool, optional
           If ``True`` the pivoting table will be translated to the device region orbitals.
           If `sort` is also true, this would correspond to the orbitals directly translated
           to the geometry ``self.geometry.sub(self.a_dev)``.
        sort : bool, optional
           Whether the returned indices are sorted. Mostly useful if you want to handle
           the device in a non-pivoted order.

        Examples
        --------
        >>> se = tbtncSileTBtrans(...)
        >>> se.pivot()
        [3, 4, 6, 5, 2]
        >>> se.pivot(sort=True)
        [2, 3, 4, 5, 6]
        >>> se.pivot(0)
        [2, 3]
        >>> se.pivot(0, in_device=True)
        [4, 0]
        >>> se.pivot(0, in_device=True, sort=True)
        [0, 1]
        >>> se.pivot(0, sort=True)
        [2, 3]

        See Also
        --------
        pivot_down : for the pivot table for electrodes down-folding regions
        """
        if elec is None:
            if in_device and sort:
                return _a.arangei(self.no_d)
            pvt = self._value('pivot') - 1
            if in_device:
                # Count number of elements that we need to subtract from each orbital
                subn = _a.onesi(self.no)
                subn[pvt] = 0
                pvt -= _a.cumsumi(subn)[pvt]
            elif sort:
                pvt = npsort(pvt)
            return pvt

        # Get electrode pivoting elements
        se_pvt = self._value('pivot', tree=self._elec(elec)) - 1
        if sort:
            # Sort pivoting indices
            # Since we know that pvt is also sorted, then
            # the resulting in_device would also return sorted
            # indices
            se_pvt = npsort(se_pvt)

        if in_device:
            pvt = self._value('pivot') - 1
            if sort:
                pvt = npsort(pvt)
            # translate to the device indices
            se_pvt = indices(pvt, se_pvt, 0)
        return se_pvt
Exemple #2
0
    def __init__(self,
                 control,
                 test,
                 effect_size,
                 is_paired=False,
                 ci=95,
                 resamples=5000,
                 random_seed=12345):
        """
        Compute the effect size between two groups.

        Parameters
        ----------
        control : array-like
        test : array-like
            These should be numerical iterables.
        effect_size : string.
            Any one of the following are accepted inputs:
            'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'
        is_paired : boolean, default False
        resamples : int, default 5000
            The number of bootstrap resamples to be taken.
        ci : float, default 95
            The confidence interval width. The default of 95 produces 95%
            confidence intervals.
        random_seed : int, default 12345
            `random_seed` is used to seed the random number generator during
            bootstrap resampling. This ensures that the confidence intervals
            reported are replicable.


        Returns
        -------
        A :py:class:`TwoGroupEffectSize` object.
        
        difference : float
            The effect size of the difference between the control and the test.
        
        effect_size : string
            The type of effect size reported.
        
        is_paired : boolean
            Whether or not the difference is paired (ie. repeated measures).
            
        ci : float
            Returns the width of the confidence interval, in percent.
            
        alpha : float
            Returns the significance level of the statistical test as a float
            between 0 and 1.
            
        resamples : int
            The number of resamples performed during the bootstrap procedure.

        bootstraps : nmupy ndarray
            The generated bootstraps of the effect size.
            
        random_seed : int
            The number used to initialise the numpy random seed generator, ie.
            `seed_value` from `numpy.random.seed(seed_value)` is returned.
            
        bca_low, bca_high : float
            The bias-corrected and accelerated confidence interval lower limit
            and upper limits, respectively.
            
        pct_low, pct_high : float
            The percentile confidence interval lower limit and upper limits, 
            respectively.
            
            
        Examples
        --------
        >>> import numpy as np
        >>> import scipy as sp
        >>> import dabest
        >>> np.random.seed(12345)
        >>> control = sp.stats.norm.rvs(loc=0, size=30)
        >>> test = sp.stats.norm.rvs(loc=0.5, size=30)
        >>> effsize = dabest.TwoGroupsEffectSize(control, test, "mean_diff")
        >>> effsize
        The unpaired mean difference is -0.253 [95%CI -0.782, 0.241]
        5000 bootstrap samples. The confidence interval is bias-corrected
        and accelerated.
        >>> effsize.to_dict() 
        {'alpha': 0.05,
         'bca_high': 0.2413346581369784,
         'bca_interval_idx': (109, 4858),
         'bca_low': -0.7818088458343655,
         'bootstraps': array([-1.09875628, -1.08840014, -1.08258695, ...,  0.66675324,
                 0.75814087,  0.80848265]),
         'ci': 95,
         'difference': -0.25315417702752846,
         'effect_size': 'mean difference',
         'is_paired': False,
         'pct_high': 0.25135646125431527,
         'pct_interval_idx': (125, 4875),
         'pct_low': -0.763588353717278,
         'pvalue_brunner_munzel': nan,
         'pvalue_kruskal': nan,
         'pvalue_mann_whitney': 0.2600723060808019,
         'pvalue_paired_students_t': nan,
         'pvalue_students_t': 0.34743913903372836,
         'pvalue_welch': 0.3474493875548965,
         'pvalue_wilcoxon': nan,
         'random_seed': 12345,
         'resamples': 5000,
         'statistic_brunner_munzel': nan,
         'statistic_kruskal': nan,
         'statistic_mann_whitney': 406.0,
         'statistic_paired_students_t': nan,
         'statistic_students_t': 0.9472545159069105,
         'statistic_welch': 0.9472545159069105,
         'statistic_wilcoxon': nan}
        """

        from numpy import array, isnan
        from numpy import sort as npsort
        from numpy.random import choice, seed

        import scipy.stats as spstats

        # import statsmodels.stats.power as power

        from string import Template
        import warnings

        from ._stats_tools import confint_2group_diff as ci2g
        from ._stats_tools import effsize as es

        self.__EFFECT_SIZE_DICT = {
            "mean_diff": "mean difference",
            "median_diff": "median difference",
            "cohens_d": "Cohen's d",
            "hedges_g": "Hedges' g",
            "cliffs_delta": "Cliff's delta"
        }

        kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]
        if effect_size not in kosher_es:
            err1 = "The effect size '{}'".format(effect_size)
            err2 = "is not one of {}".format(kosher_es)
            raise ValueError(" ".join([err1, err2]))

        if effect_size == "cliffs_delta" and is_paired is True:
            err1 = "`paired` is True; therefore Cliff's delta is not defined."
            raise ValueError(err1)

        # Convert to numpy arrays for speed.
        # NaNs are automatically dropped.
        control = array(control)
        test = array(test)
        control = control[~isnan(control)]
        test = test[~isnan(test)]

        self.__effect_size = effect_size
        self.__control = control
        self.__test = test
        self.__is_paired = is_paired
        self.__resamples = resamples
        self.__random_seed = random_seed
        self.__ci = ci
        self.__alpha = ci2g._compute_alpha_from_ci(ci)

        self.__difference = es.two_group_difference(control, test, is_paired,
                                                    effect_size)

        self.__jackknives = ci2g.compute_meandiff_jackknife(
            control, test, is_paired, effect_size)

        self.__acceleration_value = ci2g._calc_accel(self.__jackknives)

        bootstraps = ci2g.compute_bootstrapped_diff(control, test, is_paired,
                                                    effect_size, resamples,
                                                    random_seed)
        self.__bootstraps = npsort(bootstraps)

        self.__bias_correction = ci2g.compute_meandiff_bias_correction(
            self.__bootstraps, self.__difference)

        # Compute BCa intervals.
        bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(
            self.__bias_correction, self.__acceleration_value,
            self.__resamples, ci)

        self.__bca_interval_idx = (bca_idx_low, bca_idx_high)

        if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):
            self.__bca_low = self.__bootstraps[bca_idx_low]
            self.__bca_high = self.__bootstraps[bca_idx_high]

            err1 = "The $lim_type limit of the interval"
            err2 = "was in the $loc 10 values."
            err3 = "The result should be considered unstable."
            err_temp = Template(" ".join([err1, err2, err3]))

            if bca_idx_low <= 10:
                warnings.warn(err_temp.substitute(lim_type="lower",
                                                  loc="bottom"),
                              stacklevel=1)

            if bca_idx_high >= resamples - 9:
                warnings.warn(err_temp.substitute(lim_type="upper", loc="top"),
                              stacklevel=1)

        else:
            err1 = "The $lim_type limit of the BCa interval cannot be computed."
            err2 = "It is set to the effect size itself."
            err3 = "All bootstrap values were likely all the same."
            err_temp = Template(" ".join([err1, err2, err3]))

            if isnan(bca_idx_low):
                self.__bca_low = self.__difference
                warnings.warn(err_temp.substitute(lim_type="lower"),
                              stacklevel=0)

            if isnan(bca_idx_high):
                self.__bca_high = self.__difference
                warnings.warn(err_temp.substitute(lim_type="upper"),
                              stacklevel=0)

        # Compute percentile intervals.
        pct_idx_low = int((self.__alpha / 2) * resamples)
        pct_idx_high = int((1 - (self.__alpha / 2)) * resamples)

        self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
        self.__pct_low = self.__bootstraps[pct_idx_low]
        self.__pct_high = self.__bootstraps[pct_idx_high]

        # Perform statistical tests.
        if is_paired is True:
            # Wilcoxon, a non-parametric version of the paired T-test.
            wilcoxon = spstats.wilcoxon(control, test)
            self.__pvalue_wilcoxon = wilcoxon.pvalue
            self.__statistic_wilcoxon = wilcoxon.statistic

            if effect_size != "median_diff":
                # Paired Student's t-test.
                paired_t = spstats.ttest_rel(control, test, nan_policy='omit')
                self.__pvalue_paired_students_t = paired_t.pvalue
                self.__statistic_paired_students_t = paired_t.statistic

                standardized_es = es.cohens_d(control, test, is_paired=True)
                # self.__power = power.tt_solve_power(standardized_es,
                #                                     len(control),
                #                                     alpha=self.__alpha)

        elif effect_size == "cliffs_delta":
            # Let's go with Brunner-Munzel!
            brunner_munzel = spstats.brunnermunzel(control,
                                                   test,
                                                   nan_policy='omit')
            self.__pvalue_brunner_munzel = brunner_munzel.pvalue
            self.__statistic_brunner_munzel = brunner_munzel.statistic

        elif effect_size == "median_diff":
            # According to scipy's documentation of the function,
            # "The Kruskal-Wallis H-test tests the null hypothesis
            # that the population median of all of the groups are equal."
            kruskal = spstats.kruskal(control, test, nan_policy='omit')
            self.__pvalue_kruskal = kruskal.pvalue
            self.__statistic_kruskal = kruskal.statistic
            # self.__power = np.nan

        else:  # for mean difference, Cohen's d, and Hedges' g.
            # Welch's t-test, assumes normality of distributions,
            # but does not assume equal variances.
            welch = spstats.ttest_ind(control,
                                      test,
                                      equal_var=False,
                                      nan_policy='omit')
            self.__pvalue_welch = welch.pvalue
            self.__statistic_welch = welch.statistic

            # Student's t-test, assumes normality of distributions,
            # as well as assumption of equal variances.
            students_t = spstats.ttest_ind(control,
                                           test,
                                           equal_var=True,
                                           nan_policy='omit')
            self.__pvalue_students_t = students_t.pvalue
            self.__statistic_students_t = students_t.statistic

            # Mann-Whitney test: Non parametric,
            # does not assume normality of distributions
            try:
                mann_whitney = spstats.mannwhitneyu(control,
                                                    test,
                                                    alternative='two-sided')
                self.__pvalue_mann_whitney = mann_whitney.pvalue
                self.__statistic_mann_whitney = mann_whitney.statistic
            except ValueError:
                # Occurs when the control and test are exactly identical
                # in terms of rank (eg. all zeros.)
                pass

            standardized_es = es.cohens_d(control, test, is_paired=False)
Exemple #3
0
def summary_ci_1group(x,
                      func,
                      resamples=5000,
                      alpha=0.05,
                      random_seed=12345,
                      sort_bootstraps=True,
                      *args,
                      **kwargs):
    """
    Given an array-like x, returns func(x), and a bootstrap confidence
    interval of func(x).

    Keywords
    --------
    x: array-like
        An numerical iterable.

    func: function
        The function to be applied to x.

    resamples: int, default 5000
        The number of bootstrap resamples to be taken of func(x).

    alpha: float, default 0.05
        Denotes the likelihood that the confidence interval produced
        _does not_ include the true summary statistic. When alpha = 0.05,
        a 95% confidence interval is produced.
    random_seed: int, default 12345
        `random_seed` is used to seed the random number generator during
        bootstrap resampling. This ensures that the confidence intervals
        reported are replicable.
        
    sort_bootstraps: boolean, default True
    


    Returns
    -------
    A dictionary with the following five keys:
        'summary': float.
            The outcome of func(x).

        'func': function.
            The function applied to x.

        'bca_ci_low': float
        'bca_ci_high': float.
            The bias-corrected and accelerated confidence interval, for the
            given alpha.

        'bootstraps': array.
            The bootstraps used to generate the confidence interval.
            These will be sorted in ascending order if `sort_bootstraps`
            was True.

    """
    from . import confint_2group_diff as ci2g
    from numpy import sort as npsort

    boots = compute_1group_bootstraps(x, func, resamples, random_seed)
    bias = compute_1group_bias_correction(x, boots, func)

    jk = compute_1group_jackknife(x, func)
    accel = ci2g._calc_accel(jk)
    del jk

    ci_idx = ci2g.compute_interval_limits(bias, accel, resamples, alpha)

    boots_sorted = npsort(boots)

    low = boots_sorted[ci_idx[0]]
    high = boots_sorted[ci_idx[1]]

    if sort_bootstraps:
        B = boots_sorted
    else:
        B = boots
    del boots
    del boots_sorted

    out = {
        'summary': func(x),
        'func': func,
        'bca_ci_low': low,
        'bca_ci_high': high,
        'bootstraps': B
    }

    del B
    return out