def minimum_size_statsmodels(self, power=0.80):
     return NormalIndPower().solve_power(
         effect_size=proportion_effectsize(self.p_alt, self.p_null),
         alpha=self.alpha,
         power=power,
         alternative="larger",
     )
Esempio n. 2
0
    def solve_test_split(self):
        """Ignoring defined test split, solve for maximum test split.

        Returns
        -------
        max_test_split : float (0, 1)
            Maximum test split for experiment
        """
        e = self.normalized_effect_size()
        ratio = NormalIndPower().solve_power(effect_size=e,
                                             nobs1=np.ceil(self.sample_size *
                                                           self.test_split),
                                             power=(1 - self.beta),
                                             alpha=self.alpha,
                                             ratio=None)
        max_test_split = 1. / (ratio + 1)
        return max_test_split
Esempio n. 3
0
    def solve_sample_size(self):
        """Ignoring defined sample size, solves for sample size given other defined
        experimental constraints. This method utilizes the StatsModels functionality
        `solve_power`.

        Returns
        -------
        sample_size : int > 0
            Sample size required to run experiment with other constraints
        """
        e = self.normalized_effect_size()
        n_treat = NormalIndPower().solve_power(effect_size=e,
                                               power=(1 - self.beta),
                                               alpha=self.alpha,
                                               ratio=(1 - self.test_split) /
                                               self.test_split)
        return int(np.ceil(n_treat / self.test_split))
Esempio n. 4
0
    def solve_power(self):
        """Ignoring defined beta (Type II error), solve for statistical power given
        other defined experimental constraints. This method utilized the StatsModels
        functionality `solve_power`.

        Returns
        -------
        power : float (0, 1)
            Statistical power of experiment
        """
        e = self.normalized_effect_size()
        power = NormalIndPower().solve_power(
            effect_size=e,
            nobs1=np.ceil(self.sample_size * self.test_split),
            alpha=self.alpha,
            ratio=(1 - self.test_split) / self.test_split)
        return power
Esempio n. 5
0
def calc_power_two_sample(p1: float,
                          p2: float,
                          n1: int,
                          n2: int,
                          alpha: float,
                          alternative: str = 'two-sided') -> float:
    """
    :param p1: float
        proportion in the sample 1
    :param p2: float
        proportion in the sample 2
    :param n1: int
        size of the sample 1
    :param n2: int
        size of the sample 2
    :param alpha: float
        Type I error rate
    :param alternative: str
        can be 'two-sided', 'less' and 'greater'
    :return: float
        test power
    Use Cases
    _________
    >>> p1 = 0.35
    >>> p2 = 0.5
    >>> n1 = 100
    >>> n2 = 120
    >>> alpha = 0.05
    >>> res = calc_power_two_sample(p1,p2,n1,n2,alpha)
    >>> np.round(res,2)
    0.62
    """
    ratio = n2 / n1
    effect_size = calculate_two_sample_normalized_effect(p1, p2, n1, n2)
    nip = NormalIndPower()
    power = nip.solve_power(effect_size=effect_size,
                            ratio=ratio,
                            nobs1=n1,
                            alpha=alpha,
                            alternative=alternative)
    return power
Esempio n. 6
0
    def solve_absolute_mde(self):
        """Ignoring defined absolute effect size, solve for absolute Minimal
        Detectable Effect given other defined experimental constraints. We leverage
        the StatsModels functionality, but need to solve for the "absolute" MDE. The
        StatsModel function returns the normalized Cohen's d effect size:

        MDE_Absolute = sin^2(arcsin(sqrt(natural_rate)) + cohens_h/2) - natural_rate

        Returns
        -------
        min_absolute_effect : float (0, 1)
            Absolute MDE for experiment
        """
        e = NormalIndPower().solve_power(
            nobs1=np.ceil(self.sample_size * self.test_split),
            power=(1 - self.beta),
            alpha=self.alpha,
            ratio=(1 - self.test_split) / self.test_split)
        sqrt_absolute_effect = np.sin(
            np.arcsin(np.sqrt(self.natural_rate)) + e / 2.)
        min_absolute_effect = np.power(sqrt_absolute_effect,
                                       2) - self.natural_rate
        return min_absolute_effect
Esempio n. 7
0
def calc_sample_size_two_sample(p1: float,
                                p2,
                                ratio: float,
                                power: float = 0.8,
                                alpha: float = 0.05,
                                alternative: str = 'two-sided'):
    """
    Calcuates sample size for two proportions test.
    :param p1:
    :param p2:
    :param ratio:
    :param power:
    :param alpha:
    :param alternative:
    :return:
    Use Cases
    _________
    Use Cases
    _________
    >>> p1 = 0.35
    >>> p2 = 0.5
    >>> ratio = 1.2
    >>> alpha = 0.05
    >>> power = 0.8
    >>> res = calc_sample_size_two_sample(p1,p2,ratio,power,alpha)
    >>> res
    (153, 184)
    """
    effect_size = calculate_two_sample_normalized_effect(
        p1, p2, 1, np.int32(np.round(1 * ratio, 0)))
    nip = NormalIndPower()
    ssize = nip.solve_power(effect_size=effect_size,
                            ratio=ratio,
                            power=power,
                            alpha=alpha,
                            alternative=alternative)
    return np.int32(np.round(ssize, 0)), np.int32(np.round(ratio * ssize, 0))
        alpha : Type-I error rate
        beta  : Type-II error rate

    Output value:
        n : Number of samples required for each group to obtain desired power
    """

    # Get necessary z-scores and standard deviations (@ 1 obs per group)
    z_null = stats.norm.ppf(1 - alpha)
    z_alt = stats.norm.ppf(beta)
    sd_null = np.sqrt(2 * p_null * (1 - p_null))
    sd_alt = np.sqrt((p_null * (1 - p_null) + (p_alt * (1 - p_alt))))

    p_diff = p_alt - p_null
    n = ((z_null * sd_null - z_alt * sd_alt) / p_diff)**2
    return np.ceil(n)


experiment_size(0.1, 0.12)

# Alternative Approaches
# example of using statsmodels for sample size calculation
from statsmodels.stats.power import NormalIndPower
from statsmodels.stats.proportion import proportion_effectsize

# leave out the "nobs" parameter to solve for it
NormalIndPower().solve_power(effect_size=proportion_effectsize(.12, .1),
                             alpha=.05,
                             power=0.8,
                             alternative='larger')
Esempio n. 9
0
## Examine the output of the chi2_contingency function.
## If the target p_value is 0.05, what is your conclusion? Do you accept or reject H0?

## Note that this test only tells you whether A & B have different conversion rates, not
## which is larger. In this case, since A & B had the same number of visits, this is easy to
## determine. However, if you only showed B to 10% of your visitors, you may want to use a
## one-sided test instead.

## Your team also wants to know the "power" of the above results. Since they want to
## know if H1 is true, what is the possiblity that we accept H0 when H1 is true?
## The power can be obtained using the GofChisquarePower.solve_power function
effect_size = proportion_effectsize(prop1=conversion_rate_A,
                                    prop2=conversion_rate_B)
proportion_test_power = NormalIndPower().solve_power(effect_size=effect_size,
                                                     nobs1=visits_per_group,
                                                     alpha=0.05)

###################################################################################
## 2. AB Test of Means
## Scenario:
## Your team's manager asks you about dwell time differences between versions A and B
## Question: Is the customers' time spent on page different between version A
## and B of the website?
###################################################################################
## Load the data
## Remember to set your working directory to the bootcamp base folder
dwell_time_A = pd.read_csv('Datasets/Dwell_Time/Dwell_Time_VersionA.csv')
dwell_time_B = pd.read_csv('Datasets/Dwell_Time/Dwell_Time_VersionB.csv')

## Visualize the data