def test_calculate_required_sample_size_proportion( global_variables_for_calculate_required_sample_size): """ Sample size should be calculated appropriately using the statsmodels package when measuring a proportion value in the experiment (using a z-test). """ effect_size = set_up_experiment._calculate_effect_size_proportions( baseline_proportion=pytest.baseline_metric_value, new_proportion=pytest.new_metric_value, ) expected_required_sample_size = stats_power.zt_ind_solve_power( effect_size=effect_size, alpha=pytest.significance_level, power=pytest.power, alternative='two-sided', ) actual_required_sample_size = set_up_experiment.calculate_required_sample_size( baseline_metric_value=pytest.baseline_metric_value, new_metric_value=pytest.new_metric_value, measurement_type='proportion', alternative_hypothesis=pytest.alternative_hypothesis, power=pytest.power, significance_level=pytest.significance_level, ) assert actual_required_sample_size == int(expected_required_sample_size)
from statsmodels.stats.proportion import proportion_effectsize proportion_effectsize(0.6, 0.5) from statsmodels.stats.power import zt_ind_solve_power zt_ind_solve_power(effect_size=0.2, alpha=0.05, power=0.8)
standard_norm = stats.norm(0, 1) # find Z_beta from desired power Z_beta = standard_norm.ppf(power) # find Z_alpha Z_alpha = standard_norm.ppf(1 - sig_level / 2) # average of probabilities from both groups pooled_prob = (bcr + bcr + mde) / 2 min_N = (2 * pooled_prob * (1 - pooled_prob) * (Z_beta + Z_alpha)**2 / mde**2) return min_N if __name__ == '__main__': print('Sample size for exp: {}'.format( min_sample_size(bcr=0.05, mde=0.01, power=0.99))) # statsmodels way eff_size = 0.01 / np.sqrt(0.05 * (1 - 0.05)) print('Sample size for exp: {}'.format( zt_ind_solve_power(effect_size=eff_size, nobs1=None, alpha=0.05, power=0.99, ratio=1.0, alternative='two-sided')))
def calculate_required_sample_size( baseline_metric_value: float, new_metric_value: float, measurement_type: str, *, alternative_hypothesis: str = 'two-sided', power: float = 0.8, significance_level: float = 0.05, standard_deviation: float = None, ) -> int: """ Calculate the required sample size for an experiment given a certain degree of change that we want to confidently detect. Parameters ---------- baseline_metric_value : float Baseline value that reflects the current metric we are trying to change e.g. the existing retention rate. new_metric_value : float The smallest meaningful effect that we wish to be able to detect i.e. at what point do the results become commercially interesting e.g. 85% retention vs 80% baseline may be the smallest shift which yield a financial benefit that makes the project worth implementing. measurement_type : str (must be 'proportion' or 'mean') Whether the metric is a proportion (e.g. % conversion rate) or mean (e.g. average spend). alternative_hypothesis : str 'two-sided' (default), 'larger', 'smaller' Whether you are running a 'two-sided' test, or checking whether the new metric will be 'smaller' or 'larger'. 'two-sided' is generally recommended because we do not know in advance whether the change in our experiment will yield positive or negative results. power : float in interval (0,1) (default is 0.8) Probability that the test correctly rejects the Null Hypothesis if the Alternative Hypothesis is true i.e. likelihood of detecting a shift when it is genuine (one minus the probability of a type II error). Default value of 80% is commonly used but you should consider what is appropriate given the business context. significance_level : float in interval (0,1) (default is 0.05) The significance level/probability of a type I error, i.e. likelihood of a false positive (incorrectly rejecting the Null Hypothesis when it is in fact true). Default value of 5% is commonly used but you should consider what is appropriate given the business context. standard_deviation : float (default is none) Standard deviation for the metric being tested. Only needs to be set if `measurement_type` is 'mean'. Returns ------- int Minimum sample size required to satisfy experiment criteria. Raises ---------- TypeError If `measurement_type` is 'mean' but no `standard_deviation` provided. ValueError If `significance_level` or `power` not in range (0,1). ValueError If `measurement_type` not in ['proportion', 'mean']. """ # Validate that experiment's parameters are appropriate if measurement_type == 'mean' and standard_deviation is None: raise TypeError( "When measuring a mean for your test, you must also specify its existing `standard_deviation`." ) _check_experiment_inputs.validate_experiment_parameter_between_0_and_1( significance_level, 'significance_level') _check_experiment_inputs.validate_experiment_parameter_between_0_and_1( power, 'power') _check_experiment_inputs.validate_measurement_type_is_valid( measurement_type) # Calculate sample size required if measuring difference between two proportions and will therefore use a z-test if measurement_type == 'proportion': # How big is the shift we want to capture effect_size = _calculate_effect_size_proportions( baseline_proportion=baseline_metric_value, new_proportion=new_metric_value) required_sample_size = stats_power.zt_ind_solve_power( effect_size=effect_size, alpha=significance_level, power=power, alternative=alternative_hypothesis, ) # Calculate sample size required if measuring difference between two means and will therefore use a t-test elif measurement_type == 'mean': # How big is the shift we want to capture effect_size = _calculate_effect_size_means( baseline_mean=baseline_metric_value, new_mean=new_metric_value, standard_deviation=standard_deviation) required_sample_size = stats_power.tt_ind_solve_power( effect_size=effect_size, alpha=significance_level, power=power, alternative=alternative_hypothesis, ) return int(required_sample_size)
toshiba = laptops[laptops['Company'] == 'Toshiba']['Price'] # Run the t-test from scipy.stats import ttest_ind tstat, pval = ttest_ind(asus, toshiba) print('{0:0.3f}'.format(pval)) #CALCULATING SAMPLE SIZE # Standardize the effect size from statsmodels.stats.proportion import proportion_effectsize std_effect = proportion_effectsize(.20, .25) # Assign and print the needed sample size from statsmodels.stats.power import zt_ind_solve_power sample_size = zt_ind_solve_power(effect_size=std_effect, nobs1=None, alpha=.05, power=0.8) print(sample_size) sample_sizes = np.array(range(5, 100)) effect_sizes = np.array([0.2, 0.5, 0.8]) # Create results object for t-test analysis from statsmodels.stats.power import TTestIndPower results = TTestIndPower() results.plot_power(dep_var='nobs', nobs=sample_sizes, effect_size=effect_sizes) plt.show() #P corrections from statsmodels.sandbox.stats.multicomp import multipletests pvals = [.01, .05, .10, .50, .99]
alpha = 0.05 # significance level (probability of getting false positive) # power = 1 - beta where beta = (probability of getting false negative). Typical # beta = 0.2 to 0.4. So power = 0.8 to 0.6 power = 0.6 #######End - Variables to be edited by user ####### ################################################### # effect_size = difference between means divided by standard dev. Should be >0 # eg. 10% change in freq on an initial value of 50Hz = 5Hz. Say, std dev. = 10Hz # then effect size = 5/10 = 0.5 effect_size = float(abs(diff_means)) / float(stddev) print "=====================================================" now = datetime.datetime.now() print "Starting analysis at:", now.strftime("%m-%d-%Y %H:%M") print "=====================================================" print " " print "diff_means =", diff_means print "stddev = ", stddev print "effect_size =", effect_size print "alpha =", alpha print "power =", power print " " print "Observations needed =", round( smp.tt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power) ) print "Observations needed =", round( smp.zt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power) )