Beispiel #1
0
    def get_sample_size(self, beta=0.1):
        """
        Calculate required sample size per group to obtain provided beta.

        Parameters
        ----------
        beta : float
            Type 2 error rate.

        Returns
        -------
        n : int
            sample size per group.
        """

        es = sms.proportion_effectsize(self.prop_null, self.prop_alt)

        if self.alt_hypothesis == 'two_tailed':
            n = tt_ind_solve_power(
                effect_size=es,
                alpha=self.alpha,
                power=1 - beta,
                alternative='two-sided',
            )
        else:
            n = tt_ind_solve_power(
                effect_size=es,
                alpha=self.alpha,
                power=1 - beta,
                alternative='smaller')

        return int(np.round(n, 0))
Beispiel #2
0
    def _design_experiment(self, *args):
        test_type, confidence_level, power, before_eff, after_eff = args

        self.test_type = test_type
        self.confidence_level = confidence_level
        self.alpha = 1 - confidence_level
        self.beta = power
        self.effect_size = sms.proportion_effectsize(before_eff, after_eff)
Beispiel #3
0
def two_proportions_sample_size(p1, p2, alpha=0.05, power=0.8, frac=0.5):
    ratio = frac / (1. - frac)
    es = sms.proportion_effectsize(p1, p2)
    n = np.floor(sms.NormalIndPower().solve_power(es,
                                                  power=power,
                                                  alpha=alpha,
                                                  ratio=ratio))
    n1, n2 = n * ratio, n
    return n1, n2
Beispiel #4
0
 def post(self):
     sig_level = 0.05
     power = 0.8
     body = request.get_json(silent=True) or {}
     p1 = body['p1']
     p2 = body['p2']
     p1_and_p2 = sms.proportion_effectsize(p1, p2)
     sample_size = sms.NormalIndPower().solve_power(p1_and_p2,
                                                    power=power,
                                                    alpha=sig_level)
     return {
         'error': False,
         'message': 'The required sample size for this campaign is:',
         'data': round(sample_size),
         'status_code': 200
     }, 200
Beispiel #5
0
def proportions_two_samplesize(p1, p2, frac=0.5, power=0.8, alpha=0.05):
    """Размер выборки для двух долей

    Parameters
    ----------
    p1 : float in (0, 1)
        Улучшаемый показатель, например, 0.1
    p2 : float in (0, 1)
        Требуемое значение показателя, например, 0.1 * 1.2 (на 20 процентов больше p1)
    frac : float in (0, 1)
        Пропорция контрольной и общего размера теста, например, 0.2 - это 20% от всего эксперимента
    power : float in (0, 1)
        Мощность, по умолчанию 0.8
    alpha : float in (0, 1)
        Достигаемый уровень значимсоти, по-умолчанию 0.05

    Returns
    -------
    n1, n2 : float
        Необходимое количество наблюдений в контрольной и тестовой группах,
        сумма показывает общее количество необходимых наблюдений

    Notes
    -----
    Используется для вычисления размера требуемой выборки при проведении AB-теста
    p1 - показатель который необходимо улучшить до уровня p2.
    В примере:
        p1 - 0.1 (10%)
        p2 - 0.1*1.2 - требуется улучшить на 20%
        frac - 0.2 - 20% контроль, 80% тест

    Examples
    --------
    >>> proportions_two_samplesize(0.1, 0.1 * 1.2, frac=0.2)
    (2396.5, 9586.0)
    """
    ratio = frac / (1. - frac)
    es = proportion_effectsize(p1, p2)
    n = np.floor(NormalIndPower().solve_power(es,
                                              power=power,
                                              alpha=alpha,
                                              ratio=ratio))
    n1, n2 = n * ratio, n
    return n1, n2
Beispiel #6
0
def get_sample_size_for_binomial(p0, p1, power = 0.8, significance = 0.05):
    """
    Calculate the sample size for the binomial distribution

    Parameters
    ----------
    p0 : [0, 1] current proportion value
    p1 : [0, 1] expected proportion value
    power : power of the test, e.g. 0.8,
    is one minus the probability of a type II error. 
    Power is the probability that the test correctly
    rejects the Null Hypothesis if the Alternative Hypothesis is true.
    significance : significance level, e.g. 0.05, is the probability
    of a type I error, that is wrong rejections
    if the Null Hypothesis is true.

    Returns
    -------
    required_n : Number of observations per sample.
    """
    effect_size = sms.proportion_effectsize(p0, p1)
    required_n = sms.NormalIndPower().solve_power(effect_size, power=power, alpha=significance)
    return ceil(required_n)
df = pd.read_csv('ab_data.csv')
df.head()

pd.crosstab(index=df['group'], columns=df['landing_page'])

df.groupby('group')['converted'].mean()

df_sub = df.drop_duplicates(subset='user_id', keep="first")
print("Number of rows of data:", len(df))
print("Number of rows of data after removing multiple users:", len(df_sub))

df_sub.groupby('group')['converted'].mean()

# Get least number of sample size
effect = sms.proportion_effectsize(0.12, 0.14)
alpha = 0.05
power = 0.8
analysis = TTestIndPower()
result = analysis.solve_power(effect,
                              power=power,
                              nobs1=None,
                              ratio=1.0,
                              alpha=alpha)
print('Sample Size: %.3f' % result)

import math
n = math.ceil(result)

#Sample for n control and n treatment group
df_control = df_sub.loc[df_sub['group'] == 'control']
# Net Conversion - number of payments divided by number of clicks
payments_cont = control["Payments"].sum()
payments_exp = experiment["Payments"].sum()
NC_cont = payments_cont / clicks_cont
NC_exp = payments_exp / clicks_exp
NC_pooled = (payments_cont + payments_exp) / (clicks_cont + clicks_exp)
NC_sd_pooled = mt.sqrt(NC_pooled * (1 - NC_pooled) *
                       (1 / clicks_cont + 1 / clicks_exp))
NC_ME = round(get_z_score(1 - alpha / 2) * NC_sd_pooled, 4)
NC_diff = round(NC_exp - NC_cont, 4)
# print("The change due to the experiment is",NC_diff*100,"%")
# print("Confidence Interval: [",NC_diff-NC_ME,",",NC_diff+NC_ME,"]")
# print ("The change is statistically significant if the CI doesn't include 0. In that case, it is practically significant if",NC["d_min"],"is not in the CI as well.")

# Case91_嘗試以函數算出樣本數_Calculating effect size based on our expected rates
effect_size = sms.proportion_effectsize(GC["p"] - 1.0 * GC["d_min"],
                                        GC["p"] + 0.0 * GC["d_min"])
required_n = sms.NormalIndPower().solve_power(effect_size,
                                              power=0.8,
                                              alpha=0.05,
                                              ratio=1)
required_n = ceil(required_n)
print(effect_size, required_n)


# Case02-自行開發雙樣本比例的信賴區間函數
def two_proprotions_confint(success_a,
                            size_a,
                            success_b,
                            size_b,
                            significance=0.05):
    prop_a = success_a / size_a
Beispiel #9
0
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

# Some plot styling preferences
plt.style.use('seaborn-whitegrid')
font = {'family': 'Helvetica', 'weight': 'bold', 'size': 14}

mpl.rc('font', **font)

# Calculating effect size based on our expected rates
effect_size = sms.proportion_effectsize(0.13, 0.15)
required_n = sms.NormalIndPower().solve_power(effect_size,
                                              power=0.8,
                                              alpha=0.05,
                                              ratio=1)
# Calculating sample size needed
required_n = ceil(required_n)
# Rounding up to next whole number
# print(required_n)

#展示實驗資料
df = pd.read_csv('ab_data.csv')
# To make sure all the control group are seeing the old page and viceversa
# 用 crosstab 將 landing_page 當作 column,group 當作 row
# df=pd.crosstab(df['group'], df['landing_page'])
Beispiel #10
0
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

%matplotlib inline

# Some plot styling preferences
plt.style.use('seaborn-whitegrid')
font = {'family' : 'Helvetica',
        'weight' : 'bold',
        'size'   : 14}

mpl.rc('font', **font)

# calculate effect size by propotion
effect_size = sms.proportion_effectsize(0.13, 0.15)    # Calculating effect size based on our expected rates

required_n = sms.NormalIndPower().solve_power(
    effect_size, 
    power=0.8, 
    alpha=0.05, 
    ratio=1
    )                                                  # Calculating sample size needed

required_n = ceil(required_n)                          # Rounding up to next whole number                          

print(required_n)


# get dataframe info
df.info()