def test_calculate_required_sample_size_proportion(
        global_variables_for_calculate_required_sample_size):
    """
    Sample size should be calculated appropriately using the statsmodels package when measuring a proportion value in
    the experiment (using a z-test).
    """

    effect_size = set_up_experiment._calculate_effect_size_proportions(
        baseline_proportion=pytest.baseline_metric_value,
        new_proportion=pytest.new_metric_value,
    )

    expected_required_sample_size = stats_power.zt_ind_solve_power(
        effect_size=effect_size,
        alpha=pytest.significance_level,
        power=pytest.power,
        alternative='two-sided',
    )

    actual_required_sample_size = set_up_experiment.calculate_required_sample_size(
        baseline_metric_value=pytest.baseline_metric_value,
        new_metric_value=pytest.new_metric_value,
        measurement_type='proportion',
        alternative_hypothesis=pytest.alternative_hypothesis,
        power=pytest.power,
        significance_level=pytest.significance_level,
    )

    assert actual_required_sample_size == int(expected_required_sample_size)
Beispiel #2
0
from statsmodels.stats.proportion import proportion_effectsize
proportion_effectsize(0.6, 0.5)

from statsmodels.stats.power import zt_ind_solve_power
zt_ind_solve_power(effect_size=0.2, alpha=0.05, power=0.8)
Beispiel #3
0
    standard_norm = stats.norm(0, 1)

    # find Z_beta from desired power
    Z_beta = standard_norm.ppf(power)

    # find Z_alpha
    Z_alpha = standard_norm.ppf(1 - sig_level / 2)

    # average of probabilities from both groups
    pooled_prob = (bcr + bcr + mde) / 2

    min_N = (2 * pooled_prob * (1 - pooled_prob) * (Z_beta + Z_alpha)**2 /
             mde**2)

    return min_N


if __name__ == '__main__':
    print('Sample size for exp: {}'.format(
        min_sample_size(bcr=0.05, mde=0.01, power=0.99)))

    # statsmodels way
    eff_size = 0.01 / np.sqrt(0.05 * (1 - 0.05))
    print('Sample size for exp: {}'.format(
        zt_ind_solve_power(effect_size=eff_size,
                           nobs1=None,
                           alpha=0.05,
                           power=0.99,
                           ratio=1.0,
                           alternative='two-sided')))
Beispiel #4
0
def calculate_required_sample_size(
    baseline_metric_value: float,
    new_metric_value: float,
    measurement_type: str,
    *,
    alternative_hypothesis: str = 'two-sided',
    power: float = 0.8,
    significance_level: float = 0.05,
    standard_deviation: float = None,
) -> int:
    """
    Calculate the required sample size for an experiment given a certain degree of change that we want to confidently
    detect.

    Parameters
    ----------
    baseline_metric_value : float
        Baseline value that reflects the current metric we are trying to change e.g. the existing retention rate.
    new_metric_value : float
        The smallest meaningful effect that we wish to be able to detect i.e. at what point do the results become
        commercially interesting e.g. 85% retention vs 80% baseline may be the smallest shift which yield a financial
        benefit that makes the project worth implementing.
    measurement_type : str (must be 'proportion' or 'mean')
        Whether the metric is a proportion (e.g. % conversion rate) or mean (e.g. average spend).
    alternative_hypothesis : str 'two-sided' (default), 'larger', 'smaller'
        Whether you are running a 'two-sided' test, or checking whether the new metric will be 'smaller' or 'larger'.
        'two-sided' is generally recommended because we do not know in advance whether the change in our experiment
        will yield positive or negative results.
    power : float in interval (0,1) (default is 0.8)
        Probability that the test correctly rejects the Null Hypothesis if the Alternative Hypothesis is true
        i.e. likelihood of detecting a shift when it is genuine (one minus the probability of a type II error).
        Default value of 80% is commonly used but you should consider what is appropriate given the business context.
    significance_level : float in interval (0,1) (default is 0.05)
        The significance level/probability of a type I error, i.e. likelihood of a false positive (incorrectly rejecting
        the Null Hypothesis when it is in fact true). Default value of 5% is commonly used but you should consider what
        is appropriate given the business context.
    standard_deviation : float (default is none)
        Standard deviation for the metric being tested. Only needs to be set if `measurement_type` is 'mean'.

    Returns
    -------
    int
        Minimum sample size required to satisfy experiment criteria.

    Raises
    ----------
    TypeError
        If `measurement_type` is 'mean' but no `standard_deviation` provided.
    ValueError
        If `significance_level` or `power` not in range (0,1).
    ValueError
        If `measurement_type` not in ['proportion', 'mean'].
    """

    # Validate that experiment's parameters are appropriate
    if measurement_type == 'mean' and standard_deviation is None:
        raise TypeError(
            "When measuring a mean for your test, you must also specify its existing `standard_deviation`."
        )

    _check_experiment_inputs.validate_experiment_parameter_between_0_and_1(
        significance_level, 'significance_level')
    _check_experiment_inputs.validate_experiment_parameter_between_0_and_1(
        power, 'power')
    _check_experiment_inputs.validate_measurement_type_is_valid(
        measurement_type)

    # Calculate sample size required if measuring difference between two proportions and will therefore use a z-test
    if measurement_type == 'proportion':

        # How big is the shift we want to capture
        effect_size = _calculate_effect_size_proportions(
            baseline_proportion=baseline_metric_value,
            new_proportion=new_metric_value)

        required_sample_size = stats_power.zt_ind_solve_power(
            effect_size=effect_size,
            alpha=significance_level,
            power=power,
            alternative=alternative_hypothesis,
        )

    # Calculate sample size required if measuring difference between two means and will therefore use a t-test
    elif measurement_type == 'mean':

        # How big is the shift we want to capture
        effect_size = _calculate_effect_size_means(
            baseline_mean=baseline_metric_value,
            new_mean=new_metric_value,
            standard_deviation=standard_deviation)

        required_sample_size = stats_power.tt_ind_solve_power(
            effect_size=effect_size,
            alpha=significance_level,
            power=power,
            alternative=alternative_hypothesis,
        )

    return int(required_sample_size)
Beispiel #5
0
toshiba = laptops[laptops['Company'] == 'Toshiba']['Price']

# Run the t-test
from scipy.stats import ttest_ind
tstat, pval = ttest_ind(asus, toshiba)
print('{0:0.3f}'.format(pval))

#CALCULATING SAMPLE SIZE
# Standardize the effect size
from statsmodels.stats.proportion import proportion_effectsize
std_effect = proportion_effectsize(.20, .25)

# Assign and print the needed sample size
from statsmodels.stats.power import zt_ind_solve_power
sample_size = zt_ind_solve_power(effect_size=std_effect,
                                 nobs1=None,
                                 alpha=.05,
                                 power=0.8)
print(sample_size)

sample_sizes = np.array(range(5, 100))
effect_sizes = np.array([0.2, 0.5, 0.8])

# Create results object for t-test analysis
from statsmodels.stats.power import TTestIndPower
results = TTestIndPower()
results.plot_power(dep_var='nobs', nobs=sample_sizes, effect_size=effect_sizes)
plt.show()

#P corrections
from statsmodels.sandbox.stats.multicomp import multipletests
pvals = [.01, .05, .10, .50, .99]
alpha = 0.05  # significance level (probability of getting false positive)
# power = 1 - beta where beta = (probability of getting false negative). Typical
# beta = 0.2 to 0.4. So power = 0.8 to 0.6
power = 0.6

#######End - Variables to be edited by user #######
###################################################

# effect_size = difference between means divided by standard dev. Should be >0
# eg. 10% change in freq on an initial value of 50Hz = 5Hz. Say, std dev. = 10Hz
# then effect size = 5/10 = 0.5
effect_size = float(abs(diff_means)) / float(stddev)

print "====================================================="
now = datetime.datetime.now()
print "Starting analysis at:", now.strftime("%m-%d-%Y %H:%M")
print "====================================================="
print " "
print "diff_means =", diff_means
print "stddev = ", stddev
print "effect_size =", effect_size
print "alpha =", alpha
print "power =", power
print " "
print "Observations needed =", round(
    smp.tt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power)
)
print "Observations needed =", round(
    smp.zt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power)
)