Ejemplo n.º 1
0
 def minimum_size_statsmodels(self, power=0.80):
     return NormalIndPower().solve_power(
         effect_size=proportion_effectsize(self.p_alt, self.p_null),
         alpha=self.alpha,
         power=power,
         alternative="larger",
     )
Ejemplo n.º 2
0
    def fn(control, test):
        if _is_proportion(control, test):
            effect_size = proportion_effectsize(np.mean(control), np.mean(test))
            func = zt_ind_solve_power
        else:
            effect_size = _effect_size_nonprop(control, test)
            func = tt_ind_solve_power

        return func(effect_size=effect_size,
                    nobs1=len(control),
                    alpha=0.05,
                    ratio=len(test) / len(control),
                    alternative='two-sided')
Ejemplo n.º 3
0
def test_proportion_effect_size():
    # example from blog
    es = smprop.proportion_effectsize(0.5, 0.4)
    assert_almost_equal(es, 0.2013579207903309, decimal=13)
Ejemplo n.º 4
0
def test_proportion_effect_size():
    # example from blog
    es = smprop.proportion_effectsize(0.5, 0.4)
    assert_almost_equal(es, 0.2013579207903309, decimal=13)
def get_effect_size(df1, df2):
    """ Get the effect size """
    p1 = np.mean(df1)
    p2 = np.mean(df2)
    print("Effect size: {}".format(round(proportion_effectsize(p1, p2), 4)))
Ejemplo n.º 6
0
from statsmodels.stats.proportion import proportion_effectsize
proportion_effectsize(0.6, 0.5)

from statsmodels.stats.power import zt_ind_solve_power
zt_ind_solve_power(effect_size=0.2, alpha=0.05, power=0.8)
        alpha : Type-I error rate
        beta  : Type-II error rate

    Output value:
        n : Number of samples required for each group to obtain desired power
    """

    # Get necessary z-scores and standard deviations (@ 1 obs per group)
    z_null = stats.norm.ppf(1 - alpha)
    z_alt = stats.norm.ppf(beta)
    sd_null = np.sqrt(2 * p_null * (1 - p_null))
    sd_alt = np.sqrt((p_null * (1 - p_null) + (p_alt * (1 - p_alt))))

    p_diff = p_alt - p_null
    n = ((z_null * sd_null - z_alt * sd_alt) / p_diff)**2
    return np.ceil(n)


experiment_size(0.1, 0.12)

# Alternative Approaches
# example of using statsmodels for sample size calculation
from statsmodels.stats.power import NormalIndPower
from statsmodels.stats.proportion import proportion_effectsize

# leave out the "nobs" parameter to solve for it
NormalIndPower().solve_power(effect_size=proportion_effectsize(.12, .1),
                             alpha=.05,
                             power=0.8,
                             alternative='larger')
Ejemplo n.º 8
0
 expected) = proportions_chisquare(count=[success_A, success_B],
                                   nobs=[visits_per_group, visits_per_group])
print("chi2: %f \t p_value: %f" % (chi2, chi2_p_value))

## Examine the output of the chi2_contingency function.
## If the target p_value is 0.05, what is your conclusion? Do you accept or reject H0?

## Note that this test only tells you whether A & B have different conversion rates, not
## which is larger. In this case, since A & B had the same number of visits, this is easy to
## determine. However, if you only showed B to 10% of your visitors, you may want to use a
## one-sided test instead.

## Your team also wants to know the "power" of the above results. Since they want to
## know if H1 is true, what is the possiblity that we accept H0 when H1 is true?
## The power can be obtained using the GofChisquarePower.solve_power function
effect_size = proportion_effectsize(prop1=conversion_rate_A,
                                    prop2=conversion_rate_B)
proportion_test_power = NormalIndPower().solve_power(effect_size=effect_size,
                                                     nobs1=visits_per_group,
                                                     alpha=0.05)

###################################################################################
## 2. AB Test of Means
## Scenario:
## Your team's manager asks you about dwell time differences between versions A and B
## Question: Is the customers' time spent on page different between version A
## and B of the website?
###################################################################################
## Load the data
## Remember to set your working directory to the bootcamp base folder
dwell_time_A = pd.read_csv('Datasets/Dwell_Time/Dwell_Time_VersionA.csv')
dwell_time_B = pd.read_csv('Datasets/Dwell_Time/Dwell_Time_VersionB.csv')
(chi2, chi2_p_value, expected) = proportions_chisquare(count=[success_A, success_B],
                                                  nobs=[visits_per_group, visits_per_group])
print("chi2: %f \t p_value: %f" % (chi2, chi2_p_value))

## Examine the output of the chi2_contingency function.
## If the target p_value is 0.05, what is your conclusion? Do you accept or reject H0?

## Note that this test only tells you whether A & B have different conversion rates, not
## which is larger. In this case, since A & B had the same number of visits, this is easy to 
## determine. However, if you only showed B to 10% of your visitors, you may want to use a
## one-sided test instead.

## Your team also wants to know the "power" of the above results. Since they want to
## know if H1 is true, what is the possiblity that we accept H0 when H1 is true?
## The power can be obtained using the GofChisquarePower.solve_power function
effect_size = proportion_effectsize(prop1=conversion_rate_A, prop2=conversion_rate_B)
proportion_test_power = NormalIndPower().solve_power(effect_size=effect_size, nobs1=visits_per_group, alpha=0.05)

###################################################################################
## 2. AB Test of Means
## Scenario:
## Your team's manager asks you about dwell time differences between versions A and B
## Question: Is the customers' time spent on page different between version A
## and B of the website?
###################################################################################
## Load the data
## Remember to set your working directory to the bootcamp base folder
dwell_time_A = pd.read_csv('Datasets/Dwell_Time/Dwell_Time_VersionA.csv')
dwell_time_B = pd.read_csv('Datasets/Dwell_Time/Dwell_Time_VersionB.csv')

## Visualize the data
Ejemplo n.º 10
0
print('{0:0.3f}'.format(pval))

#2 Tail T test
# Assign the prices of each group
asus = laptops[laptops['Company'] == 'Asus']['Price']
toshiba = laptops[laptops['Company'] == 'Toshiba']['Price']

# Run the t-test
from scipy.stats import ttest_ind
tstat, pval = ttest_ind(asus, toshiba)
print('{0:0.3f}'.format(pval))

#CALCULATING SAMPLE SIZE
# Standardize the effect size
from statsmodels.stats.proportion import proportion_effectsize
std_effect = proportion_effectsize(.20, .25)

# Assign and print the needed sample size
from statsmodels.stats.power import zt_ind_solve_power
sample_size = zt_ind_solve_power(effect_size=std_effect,
                                 nobs1=None,
                                 alpha=.05,
                                 power=0.8)
print(sample_size)

sample_sizes = np.array(range(5, 100))
effect_sizes = np.array([0.2, 0.5, 0.8])

# Create results object for t-test analysis
from statsmodels.stats.power import TTestIndPower
results = TTestIndPower()
Ejemplo n.º 11
0
# z-test..
stat, pval = proportions_ztest(count, nobs, alternative="larger")
print('{0:0.3f}'.format(pval))
# t-test..
from scipy.stats import ttest_ind
# tstat, pval = ttest_ind(asus, toshiba) # ..comparing 2 price series

# power analysis
# ---
# power and sample size (how to calculate required sample size)
# -> power analysis!
# elements: effect size, significance level, power, sample size all lead
# to a larger sample size
from statsmodels.stats.proportion import proportion_effectsize
std_effect = proportion_effectsize(0.2, 0.25)
# derive the required sample size..
from statsmodels.stats.power import zt_ind_solve_power
sample_size = zt_ind_solve_power(effect_size=std_effect,
                                 nobs1=None,
                                 alpha=0.05,
                                 power=0.95)
print(sample_size)
# relationship between power and sample size..
sample_sizes = np.array(range(5, 100))
effect_sizes = np.array([0.2, 0.5, 0.8])
# esults object for t-test analysis..
from statsmodels.stats.power import TTestIndPower
results = TTestIndPower()
# plot of power analysis..
results.plot_power(dep_var='nobs', nobs=sample_sizes, effect_size=effect_sizes)