def get_sample_size(self, beta=0.1): """ Calculate required sample size per group to obtain provided beta. Parameters ---------- beta : float Type 2 error rate. Returns ------- n : int sample size per group. """ es = sms.proportion_effectsize(self.prop_null, self.prop_alt) if self.alt_hypothesis == 'two_tailed': n = tt_ind_solve_power( effect_size=es, alpha=self.alpha, power=1 - beta, alternative='two-sided', ) else: n = tt_ind_solve_power( effect_size=es, alpha=self.alpha, power=1 - beta, alternative='smaller') return int(np.round(n, 0))
def test_calculate_required_sample_size_mean( global_variables_for_calculate_required_sample_size): """ Sample size should be calculated appropriately using the statsmodels package when measuring a mean value in the experiment (using a t-test). """ effect_size = set_up_experiment._calculate_effect_size_means( baseline_mean=pytest.baseline_metric_value, new_mean=pytest.new_metric_value, standard_deviation=pytest.standard_deviation) expected_required_sample_size = stats_power.tt_ind_solve_power( effect_size=effect_size, alpha=pytest.significance_level, power=pytest.power, alternative=pytest.alternative_hypothesis, ) actual_required_sample_size = set_up_experiment.calculate_required_sample_size( baseline_metric_value=pytest.baseline_metric_value, new_metric_value=pytest.new_metric_value, measurement_type='mean', alternative_hypothesis=pytest.alternative_hypothesis, power=pytest.power, significance_level=pytest.significance_level, standard_deviation=pytest.standard_deviation, ) assert actual_required_sample_size == int(expected_required_sample_size)
def generate_violion_plots(plot_col, group_col, group_order, ax): boxes = [] mus = [] stds = [] g_order = [] for group in group_order: mask = group_col == group tmp = plot_col[mask].dropna() if len(tmp) > 2: g_order.append(group) boxes.append(tmp.copy().values) mus.append(plot_col[mask].mean()) stds.append(plot_col[mask].std()) if len(boxes) == 2: ef = abs(np.diff(mus))/(np.sum(stds)) ratio = len(boxes[1])/len(boxes[0]) n0 = tt_ind_solve_power(effect_size=ef, alpha = alpha, power = power, ratio = ratio) sizes = [str(int(n0)), str(int(n0*ratio))] _, pval = ttest_ind(*boxes) else: sizes = ['']*len(boxes) _, pval = kruskal(*boxes) labels = ['%s n=%i/%s' % (t, len(b), n) for t, b, n in zip(g_order, boxes, sizes)] violinplot(boxes, ax = ax, labels = labels) return pval, ax
def mean_ttest_analyzer(sample_1: np.array, sample_2: np.array, alpha=0.05, return_abs_diff=True): """ Make a quick statistical assessment for the mean of 2 different samples (hypothesis test based) :param dataframe: original dataframe in a subject level :param group_col: the name of the group column :param category_col: the name of the category_col column :returns group_share_per_category_df: df containing the % share each category has by group """ seed(666) # ensure results reproducibility _, pvalue = ttest_ind(a=sample_2, b=sample_1, nan_policy='omit') # power parameters std_diff = np.sqrt(np.nanvar(sample_2) + np.nanvar(sample_1)) abs_mean_diff = np.nanmean(sample_2) - np.nanmean(sample_1) effect_size = abs_mean_diff / std_diff treatment_size = (~np.isnan(sample_2)).sum() size_ratio = (~np.isnan(sample_1)).sum() / treatment_size power = tt_ind_solve_power(effect_size=effect_size, alpha=alpha, power=None, ratio=size_ratio, alternative='two-sided', nobs1=treatment_size) if return_abs_diff: return pvalue, power, abs_mean_diff else: return pvalue, power
def run(self, data, knobs): if not super(self.__class__, self).run(data, knobs): error("Aborting analysis.") return # pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2) # effect_size = self.mean_diff / pooled_std sample_size = len(self.y1) power = tt_ind_solve_power(effect_size=self.effect_size, nobs1=sample_size, alpha=self.alpha, alternative=self.alternative) result = dict() result["effect_size"] = self.effect_size result["sample_size"] = sample_size result["alpha"] = self.alpha result["power"] = power # result["mean_diff"] = self.mean_diff # result["pooled_std"] = pooled_std return result
def run(self, data, knobs): if not super(self.__class__, self).run(data, knobs): error("Aborting analysis.") return if not self.effect_size: if not self.mean_diff: raise Exception( "You cannot leave both mean_diff and effect_size paramaters empty" ) pooled_std = sqrt((var(self.y1) + var(self.y2)) / 2) effect_size = self.mean_diff / pooled_std else: effect_size = self.effect_size sample_size = tt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=self.alpha, power=self.power, alternative=self.alternative) result = dict() result["effect_size"] = effect_size result["sample_size"] = floor(sample_size) result["alpha"] = self.alpha result["power"] = self.power # result["mean_diff"] = self.mean_diff # result["pooled_std"] = pooled_std return result
def samples_per_branch_calculator(u_hat, mde=0.05, alpha=0.05, power=0.95): var_hat = u_hat * (1 - u_hat) standardized_effect_size = (u_hat - (u_hat * (1 + mde))) / np.sqrt(var_hat) sample_size = tt_ind_solve_power(effect_size=standardized_effect_size, alpha=alpha, power=power) return sample_size
def determine_sample_size(features): control_rate = float(features['conversion_rate']) / 100 test_rate = control_rate + control_rate * float( features['detectable_difference']) / 100 num_variants = int(features['number_of_variants']) power = float(features['power']) alpha = float(features['alpha']) / (num_variants - 1) proportion_control = float(features['control_proportion']) ratio = ((100 - proportion_control) / (num_variants - 1)) / proportion_control effect_size = cohens_h(control_rate, test_rate) sample_size_control = int( tt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power, ratio=ratio)) try: samples_per_day = float(features['samples_per_day']) days_to_run_estimate = round(sample_size_control / samples_per_day) except: samples_per_day = 'Unknown' days_to_run_estimate = 'Unknown' return sample_size_control, days_to_run_estimate
def solve_absolute_mde(self): """Ignoring defined absolute effect size, solve for absolute Minimal Detectable Effect given other defined experimental constraints. We leverage the StatsModels functionality, but need to solve for the "absolute" MDE. The StatsModel function returns the normalized Cohen's d effect size: d = absolute_effect_size / standard_deviation, Therefore, we solve for d * standard_deviation. Returns ------- min_absolute_effect : float (0, 1) Absolute MDE for experiment """ test_type = self._alternative_direction() e = tt_ind_solve_power( nobs1=np.ceil(self.sample_size * self.test_split) ,power=(1 - self.beta) ,alpha=self.alpha ,ratio=(1 - self.test_split) / self.test_split ,alternative=test_type ) min_absolute_effect = e * self.sigma if self.absolute_effect < 0 and min_absolute_effect > 0: min_absolute_effect *= -1 return min_absolute_effect
def samples_per_branch_calculator(p_hat, mde, alpha, power): """ Classical sample size calculation for ttest """ u_hat = p_hat.values.dot(p_hat.distribution) sigma_hat = sum([p_hat.distribution[i] * ((p_hat.values[i] - u_hat) ** 2) for i in range(len(p_hat.distribution))]) standardized_effect_size = (u_hat - (u_hat * (1 + mde))) / np.sqrt(sigma_hat) sample_size = tt_ind_solve_power(effect_size=standardized_effect_size, alpha=alpha, power=power) return sample_size
def get_effect_size_ncp(data1, data2): mean_difference = data1.mean - data2.mean num = ((data1.n - 1)*(data1.sd**(2))) + ((data2.n - 1)*(data2.sd**(2))) den = data1.n + data2.n - 2 sd = math.sqrt(num/den) # pooled sd es = abs(mean_difference / sd) nobs_ratio = data1.n / data2.n # error check front-side power calculation with backend power calculation power = tt_ind_solve_power(effect_size=es, nobs1=data1.n, alpha=0.05, ratio=nobs_ratio, alternative='larger') print("POWER ",power) p = 1./ (1./data1.n + 1./data2.n) # from statsmodels library code ncp = es*math.sqrt(p) return round(es, DEC), ncp
def plot_KvsVs30(bins, cut, snr, vs30_path, full_file_path,savepath): fullfile = pd.read_csv(full_file_path) kappa = fullfile[' tstar(s) '].values vs30 = pd.read_csv(vs30_path).values name = fullfile['Name'].values vs30_1d = np.reshape(vs30,-1) lnvs30 = np.log(vs30_1d) print(lnvs30) Pearson_R,P_val = pearsonr(kappa,vs30_1d) Power = tt_ind_solve_power(effect_size=Pearson_R,nobs1=30, alpha=0.05) Pearson_R = float(Pearson_R) P_val = float(P_val) Power = float(Power) # text = 'Pearson R = ' + r + ' P-val = ' + p_val +' Power = ' + power + ' .' textstr = '\n'.join(( r'$Pearson R:%.4f$' % (Pearson_R, ), r'$P val:%.4f$' % (P_val, ), r'$Power:%.4f$' % (Power, ))) ########################## fig = plt.figure(figsize = (10,10)) # plt.axes().set_xscale("log") # plt.axes().set_yscale("log") plt.ylabel('ln(Vs30) (m/s)', fontsize = 16) plt.xlabel('Kappa (s)', fontsize = 16) #plt.xlim(0.5,50) #plt.ylim(10e-6,3) plt.minorticks_on plt.grid(True, which='both') plt.tick_params(axis='both', which='major', labelsize=15) plt.tick_params(axis='both', which='both', length = 5, width = 1) plt.title('Kappa vs Vs30 ' + 'SNR '+np.str(snr)+' '+np.str(bins)+' bins '+np.str(cut)+' sec cut', fontsize = 16) # these are matplotlib.patch.Patch properties # place a text box in upper left in axes coords plt.text(0.02, 5.25, textstr, fontsize=14, verticalalignment='top',) plt.scatter(kappa, lnvs30) for i in range(30): if i == 1 or i == 10 or i ==7: alignment = 'right' else: alignment = 'left' plt.annotate(name[i], (kappa[i], lnvs30[i]), ha=alignment, alpha =.9,rotation=0) plt.savefig(savepath)
def make_overall_stats_df(df, arm_stats=None, assume_t=False, effect_size=None): step_sizes = df['num_steps'].unique() rows = [] for num_steps in step_sizes: cur_row = {} df_for_num_steps = df[df['num_steps'] == num_steps] cur_row['num_steps'] = num_steps num_replications = len(df_for_num_steps) cur_row['num_reps'] = num_replications num_rejected = np.sum(df_for_num_steps['pvalue'] < .05) cur_row['proportion_null_rejected'] = num_rejected / num_replications cur_row['avg_ratio'] = np.mean(df_for_num_steps['ratio']) cur_row['mean_1'] = np.mean(df_for_num_steps['mean_1']) cur_row['mean_2'] = np.mean(df_for_num_steps['mean_2']) cur_row['min_cond_1'] = df_for_num_steps.min()['mean_1'] cur_row['min_cond_2'] = df_for_num_steps.min()['mean_2'] cur_row['max_cond_1'] = df_for_num_steps.max()['mean_1'] cur_row['max_cond_2'] = df_for_num_steps.max()['mean_2'] if assume_t and effect_size != None: expected_power = smp.tt_ind_solve_power(effect_size, num_steps / 2, 0.05, None, 1) cur_row['expected_power'] = expected_power avg_es = np.mean(df_for_num_steps['actual_es']) cur_row['avg_es'] = avg_es count_significant_cases = np.count_nonzero( df_for_num_steps['pvalue'] < .05) count_sign_errors_given_significant = np.count_nonzero( (df_for_num_steps['pvalue'] < .05) & (df_for_num_steps['mean_1'] < df_for_num_steps['mean_2'])) count_sign_errors_overall = np.count_nonzero( df_for_num_steps['mean_1'] < df_for_num_steps['mean_2']) if count_significant_cases != 0: cur_row[ 'type_s_errors'] = count_sign_errors_given_significant / count_significant_cases else: cur_row['type_s_errors'] = 0 cur_row[ 'overall_prop_sign_reversed'] = count_sign_errors_overall / num_replications rows.append(cur_row) # Make dataframe df = pd.DataFrame(rows) return df
def sample_power_ttest(p1, p2, sd_diff, alpha=0.05, power=0.8, ratio=1, alternative='two-sided'): mean_diff = abs(p2 - p1) std_effect_size = np.divide(mean_diff, sd_diff) n = tt_ind_solve_power( effect_size=std_effect_size, alpha=alpha, power=power, ratio=ratio, alternative=alternative ) # Potential improvement: make this able to handle one-sided tests return np.array(n).round()
def hypothesis_test_one(cleaned_data, alpha=0.5): """ Describe the purpose of your hypothesis test in the docstring These functions should be able to test different levels of alpha for the hypothesis test.If a value of alpha is entered that is outside of the acceptable range, an error should be raised. :param alpha: the critical value of choice :param cleaned_data: :return: """ comparison_groups = create_sample_dists(cleaned_data, y_var='average_covered_charges') metro_sample = comparison_groups[0] non_metro_sample = comparison_groups[1] p_val = st.ttest_ind(metro_sample, non_metro_sample, equal_var=False)[1] coh_d = abs(cohen_d(metro_sample, non_metro_sample)) nobs1 = len(non_metro_sample) ratio = len(metro_sample)/nobs1 power = pw.tt_ind_solve_power(effect_size=coh_d, nobs1=nobs1, alpha=alpha, power=None, ratio=ratio, alternative='two-sided') status = compare_pval_alpha(p_val, alpha) assertion = '' if status == 'Fail to reject': assertion = 'cannot' else: assertion = "can" print(f'Based on the p value of {p_val} and our aplha of {alpha} \ we {status.lower()} the null hypothesis.' f'\n Due to these results, we {assertion} state that there \ is a difference in charges between Hospitals in Metro and\ Non-Metro Areas') if assertion == 'can': print(f"with an effect size, cohen's d, of {str(coh_d)} and \ power of {power}.") else: print(".") return status
def nobs(estimated=None, impressive=None): """ Given the estimated and impressive figures, conduct a power calculation and return the number of required observations. Arguments: - `estimated`: int - `impressive`: int Return: int Exceptions: None """ estimated, impressive = float(estimated), float(impressive) if abs(estimated/impressive) > 1: effect_size = abs(estimated/impressive) else: effect_size = abs(impressive/estimated) num = tt_ind_solve_power(effect_size=effect_size, alpha=0.05, power=0.8, nobs1=None) return int(num) * 2
def solve_sample_size(self): """Ignoring defined sample size, solves for sample size given other defined experimental constraints. This method utilizes the StatsModels functionality `solve_power`. Returns ------- sample_size : int > 0 Sample size required to run experiment with other constraints """ e = self.normalized_effect_size() test_type = self._alternative_direction() n_treat = tt_ind_solve_power( effect_size=e ,power=(1 - self.beta) ,alpha=self.alpha ,ratio=(1 - self.test_split) / self.test_split ,alternative=test_type ) return int(np.ceil(n_treat / self.test_split))
def solve_test_split(self): """Ignoring defined test split, solve for maximum test split. Returns ------- max_test_split : float (0, 1) Maximum test split for experiment """ e = self.normalized_effect_size() test_type = self._alternative_direction() ratio = tt_ind_solve_power( effect_size=e ,nobs1=np.ceil(self.sample_size * self.test_split) ,power=(1 - self.beta) ,alpha=self.alpha ,ratio=None ,alternative=test_type ) max_test_split = 1./(ratio + 1) return max_test_split
def solve_power(self): """Ignoring defined beta (Type II error), solve for statistical power given other defined experimental constraints. This method utilized the StatsModels functionality `solve_power`. Returns ------- power : float (0, 1) Statistical power of experiment """ e = self.normalized_effect_size() test_type = self._alternative_direction() power = tt_ind_solve_power( effect_size=e ,nobs1=np.ceil(self.sample_size * self.test_split) ,alpha=self.alpha ,ratio=(1 - self.test_split) / self.test_split ,alternative=test_type ) return power
def calc_t_sample_size( baseline_average: np.float64, baseline_stdev: np.float64, expected_uplift_percentage: np.float64, power_percentage: np.float64 = 80, confidence_level_percentage: np.float64 = 95) -> np.float64: """Calculates the minimum sample size for A/B test for numeric KPI. Estimates the minimum required sample size for either a Test or a Control group in an A/B test when the KPI is a numeric variable such as revenue, number of conversions, etc. Args: baseline_average: Average value of the baseline KPI. E.g. avarege reveue per user. baseline_stdev: Standard deviation value of the baseline KPI. E.g. standard deviation of the revenue per user. expected_uplift_percentage: Expected uplift of the media experiment on the baseline average as a percentage. E.g. 10 for 10% uplift. power_percentage: Statistical power of the T-test as a percentage. confidence_level_percentage: Statistical confidence level of the T-test as a percentage. Returns: Estimated minimum sample size required for either a Test or a Control group in the A/B test. """ expected_kpi = baseline_average * (100 + expected_uplift_percentage) / 100 uplift_kpi = expected_kpi - baseline_average effect_size = uplift_kpi / baseline_stdev stat_alpha = (100 - confidence_level_percentage) / 100 stat_power = power_percentage / 100 return round( power.tt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=stat_alpha, power=stat_power))
def ttest(control, treatment, alpha=0.05): """calculates the ttest pvalue, the percentage lift and the test power based on a pre set alpha # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html # https://www.statsmodels.org/stable/generated/statsmodels.stats.power.tt_ind_solve_power.html""" # random seed to ensure results reproducibility seed(666) # p_value stat, p = ttest_ind(a=treatment, b=control, nan_policy='omit') # mean lift mean_diff = np.nanmean(treatment) - np.nanmean(control) #lift=(mean_diff/np.nanmean(control))*100.0 # power parameters std_diff = np.sqrt(np.nanvar(treatment) + np.nanvar(control)) effect_size = mean_diff / std_diff treatment_size = (~np.isnan(treatment)).sum() size_ratio = (~np.isnan(control)).sum() / treatment_size power = tt_ind_solve_power(effect_size=effect_size, alpha=alpha, power=None, ratio=size_ratio, alternative='two-sided', nobs1=treatment_size) return p, mean_diff, power
def nobs(estimated=None, impressive=None): """ Given the estimated and impressive figures, conduct a power calculation and return the number of required observations. Arguments: - `estimated`: int - `impressive`: int Return: int Exceptions: None """ estimated, impressive = float(estimated), float(impressive) if abs(estimated / impressive) > 1: effect_size = abs(estimated / impressive) else: effect_size = abs(impressive / estimated) num = tt_ind_solve_power(effect_size=effect_size, alpha=0.05, power=0.8, nobs1=None) return int(num) * 2
def hypothesis_test_two(cleaned_data, alpha=0.5): comparison_groups = create_sample_dists(cleaned_data, y_var='out_of_pocket') ### # Main chunk of code using t-tests or z-tests, effect size, power, etc metro_sample = comparison_groups[0] non_metro_sample = comparison_groups[1] p_val = st.ttest_ind(metro_sample, non_metro_sample, equal_var=False)[1] coh_d = abs(cohen_d(metro_sample, non_metro_sample)) nobs1 = len(non_metro_sample) ratio = len(metro_sample)/nobs1 power = pw.tt_ind_solve_power(effect_size=coh_d, nobs1=nobs1, alpha=alpha, power=None, ratio=ratio, alternative='two-sided') ### # starter code for return statement and printed results status = compare_pval_alpha(p_val, alpha) assertion = '' if status == 'Fail to reject': assertion = 'cannot' else: assertion = "can" # calculations for effect size, power, etc here as well print(f'Based on the p value of {p_val} and our aplha of {alpha} we \ {status.lower()} the null hypothesis.' f'\n Due to these results, we {assertion} state that \ there is a difference in out-of-pocket cost for patients \ using Hospitals in Metro vs. Non-Metro Areas') if assertion == 'can': print(f"with an effect size, cohen's d, of {str(coh_d)} and\ power of {power}.") else: print(".") return status
def t_test_with_power_comp(x1, x2, alternative='two-sided', alpha=0.05, power=0.8): """ Independent (as contrasted with Paired) t-test with power calculation based on n_obs; effect size based on estimate from input. """ import statsmodels.stats.power as smpwr import statsmodels.api as sm import numpy as np import matplotlib.pyplot as plt t_stat, pvalue, degrees_of_freedom = sm.stats.ttest_ind( x1=x1, x2=x2, alternative=alternative) print( "T: {t_stat}, p-value: {pvalue}, degrees of freedom: {degrees_of_freedom}," " n_obs_1 = {n_obs_1}, n_obs_2 = {n_obs_2}".format( t_stat=t_stat, degrees_of_freedom=degrees_of_freedom, pvalue=pvalue, n_obs_1=len(x1), n_obs_2=len(x2))) # Power calculation pooled_standard_dev_empirical = np.sqrt(np.mean([np.std(x1), np.std(x2)])) mean_diff_empirical = abs(np.mean(x1) - np.mean(x2)) effect_size_empirical = mean_diff_empirical / pooled_standard_dev_empirical print( "Empirical pooled stdev: {:.2f}".format(pooled_standard_dev_empirical)) print("Mean diff empirical: {:.2f}\neffect size empirical: {:.2f}".format( mean_diff_empirical, effect_size_empirical)) # Empirical power needed nobs1 = smpwr.tt_ind_solve_power( effect_size=effect_size_empirical, nobs1=None, alpha=alpha, power=power, alternative=alternative, ) print( "With alpha {alpha}, power {power}, need ≈ {nobs1:.0f} observations of each type to achieve significance" .format(alpha=alpha, power=power, nobs1=nobs1)) # Power vs. nobs smpwr.TTestIndPower().plot_power(dep_var='nobs', nobs=np.arange(2, 10), effect_size=[effect_size_empirical], alternative=alternative, alpha=alpha) plt.show()
def solve(eff): nobs = tt_ind_solve_power(effect_size=eff, alpha=0.05, power=0.8, nobs1=None) return int(nobs*2)
def solve(eff): nobs = tt_ind_solve_power(effect_size=eff, alpha=0.05, power=0.8, nobs1=None) return int(nobs * 2)
alpha = 0.05 # significance level (probability of getting false positive) # power = 1 - beta where beta = (probability of getting false negative). Typical # beta = 0.2 to 0.4. So power = 0.8 to 0.6 power = 0.6 #######End - Variables to be edited by user ####### ################################################### # effect_size = difference between means divided by standard dev. Should be >0 # eg. 10% change in freq on an initial value of 50Hz = 5Hz. Say, std dev. = 10Hz # then effect size = 5/10 = 0.5 effect_size = float(abs(diff_means)) / float(stddev) print "=====================================================" now = datetime.datetime.now() print "Starting analysis at:", now.strftime("%m-%d-%Y %H:%M") print "=====================================================" print " " print "diff_means =", diff_means print "stddev = ", stddev print "effect_size =", effect_size print "alpha =", alpha print "power =", power print " " print "Observations needed =", round( smp.tt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power) ) print "Observations needed =", round( smp.zt_ind_solve_power(effect_size=effect_size, nobs1=None, alpha=alpha, power=power) )
# In[94]: # we'll use this for the next 2 steps from statsmodels.stats.power import tt_ind_solve_power # In[95]: # what is the power of our current experiment? # e.g. how likely is it that correctly decided that B is better than A # given the observed effect size, number of observations and alpha level we used above # since these are independent samples we can use tt_ind_solve_power # hint: the power we get should not be good power = tt_ind_solve_power( effect_size=observed_effect_size, # what we just calculated nobs1=n_A, # the number of observations in A alpha=0.05, # our alpha level power=None, # what we're interested in ratio=1 # the ratio of number of observations of A and B ) power # In[96]: # how many observations for each of A and B would we need to get a power of .9 # for our observed effect size and alpha level # eg. having a 90% change of correctly deciding B is better than A n_obs_A = tt_ind_solve_power(effect_size=observed_effect_size, nobs1=None, alpha=0.05, power=.9, ratio=1)
def report_on(trial): """ Calculate headline stats for TRIAL once """ tr = TrialAnalysis.objects.get_or_create(trial=trial)[0] if trial.report_set.count() < 2: return nobs1 = int(trial.report_set.count()/2) if trial.offline: reports = trial.report_set.all() else: reports = trial.report_set.exclude(date__isnull=True) points = [t.get_value() for t in reports] pointsa = [t.get_value() for t in reports.filter(group__name=Group.GROUP_A)] pointsb = [t.get_value() for t in reports.filter(group__name=Group.GROUP_B)] sd = np.std(points) mean = np.mean(points) nobsa = len(pointsa) nobsb = len(pointsb) meana = np.mean(pointsa) meanb = np.mean(pointsb) stderrmeana = scistats.sem(pointsa) stderrmeanb = scistats.sem(pointsb) small = tt_ind_solve_power(effect_size=0.1, alpha=0.05, nobs1=nobs1, power=None) med = tt_ind_solve_power(effect_size=0.2, alpha=0.05, nobs1=nobs1, power=None) large = tt_ind_solve_power(effect_size=0.5, alpha=0.05, nobs1=nobs1, power=None) if trial.variable_set.get().style == Variable.BINARY: obs = np.array([[len([p for p in pointsa if p == True]), len([p for p in pointsa if p == False])], [len([p for p in pointsb if p == True]), len([p for p in pointsb if p == False])]]) try: chi2, pval, dof, expected = scistats.chi2_contingency(obs) except ValueError: exc = traceback.format_exc() class Message(letter.Letter): Postie = POSTIE From = '*****@*****.**' To = '*****@*****.**' Subject = 'Chi2 failure instance' Body = "Couldn't run chi2 for {0}\n\n{1}".format(str(obs), exc) try: Message.send() except: print exc pval = None else: tstat, pval, df = ttest_ind(pointsa, pointsb) tr.power_small=small tr.power_med=med tr.power_large=large tr.sd=sd tr.mean=mean tr.nobsa = nobsa tr.nobsb = nobsb tr.meana = meana tr.meanb = meanb tr.stderrmeana = stderrmeana tr.stderrmeanb = stderrmeanb tr.pval = pval tr.save() return
def simulate_experiments(sample_sizes, effect_size, n_experiments, viz_path, alpha=0.05, verbose=True): ''' Function for simulating and comparing experiments carried out with different sample sizes. For each experiment effect size and alpha are fixed. Args: - sample_size: an integer specifying the sample size of each of the two groups in the simulated experiments. - effect_size: a float specifying the magnitude of the difference between the two groups in the simulated experiments (Cohen's d). - n_experiments: an integer specifying the number of simulated experiments. - viz_path: a string specifying the location where to save the plotted results. - alpha: a float specifying the p-value threshold for considering the results of an experiment statistically significant. - verbose: a bolean specifying if the plotted results are showed on screen. Returns: - None ''' experiments_outcomes = [] achieved_powers = [] for size in sample_sizes: achieved_power = tt_ind_solve_power(effect_size=effect_size, nobs1=size, alpha=alpha, ratio=1.0) p_values = [] effect_sizes = [] for experiment in range(n_experiments): group_1, group_2 = generate_samples(n=size, effect_size=effect_size, sd=1.0) t, p = ttest_ind(a=group_1, b=group_2) p_values.append(round(p, 3)) effect_sizes.append(round(cohen_d(t=t, n=size * 2), 3)) experiments_outcomes.append({ 'p_values': p_values, 'effect_sizes': effect_sizes }) achieved_powers.append(achieved_power) visualize_experiments(sizes=sample_sizes, experiments=experiments_outcomes, powers=achieved_powers, alpha=alpha, effect_size=effect_size, viz_path=viz_path, verbose=verbose)
else: rho_values = map(int, rho_input.split(",")) for rho in rho_values: pvalues = [] powers005 = [] powers001 = [] powers0001 = [] print("Rho = %d:" % rho) for comb in itertools.combinations(range(len(values1)), rho): vals1 = [values1[i] for i in comb] vals2 = [values2[i] for i in comb] stat, pvalue = ttest_ind(vals1, vals2, equal_var=True) power005 = tt_ind_solve_power( effect_size=(np.mean(vals1) - np.mean(vals2)) / np.std(vals1), nobs1=len(vals1), alpha=0.05, power=None) powers005.append(power005) power001 = tt_ind_solve_power( effect_size=(np.mean(vals1) - np.mean(vals2)) / np.std(vals1), nobs1=len(vals1), alpha=0.01, power=None) powers001.append(power001) power0001 = tt_ind_solve_power( effect_size=(np.mean(vals1) - np.mean(vals2)) / np.std(vals1), nobs1=len(vals1), alpha=0.001, power=None) powers0001.append(power0001)
def ttest(effect=None, alpha=None, power=None): num = tt_ind_solve_power(effect_size=effect, alpha=alpha, power=power, nobs1=None) return int(num) * 2
def power(effect_size): alpha = 0.05 power = 0.8 from statsmodels.stats.power import tt_ind_solve_power n = tt_ind_solve_power(effect_size=effect_size, alpha=alpha, power=power) return n
from statsmodels.stats.power import tt_ind_solve_power # <codecell> normmean = 40 normstd = 17 admean = 55 adstd = 20 ef = abs(normmean-admean)/(adstd+normstd) ratio = 1.0 alpha = 0.05 power = 0.9 n0 = tt_ind_solve_power(effect_size=ef, alpha = alpha, power = power, ratio = ratio) print n0 # <codecell> import pandas as pd # <codecell> data = pd.read_excel('/home/will/ClaudioStuff/TableData.xlsx', 'Sheet1') # <codecell> data['HIV'] = data['HIVInfected'] == 'pos' data['Aged'] = data['Age']>50
def compare_groups(dataframe, feature, targets, control_group=None, alpha=0.05, p_adjust=False, show_groups=True, **kwargs): figsize = (12, 8) edgecolor = None # Deal with keyword arguments for k, v in kwargs.items(): if k not in ['figsize', 'edgecolor']: raise TypeError( "compare_groups got an unexpected keyword argument {}".format( k)) else: if k == 'figsize': figsize = v elif k == 'edgecolor': edgecolor = v text_color = plt.rcParams.get('ytick.color') # Deal with targets input if type(targets) == str: targets = [targets] for target in targets: control = None info = {} grouped = dataframe.groupby([feature])[target] if control_group is None: control_group = grouped.iloc[0][0] k = len(grouped) - 1 for group in grouped: temp = {} if group[0] == control_group: control = np.array(group[1]) continue else: test_group = np.array(group[1]) size = len(test_group) if size == 1: mu, std = control.mean(), control.std(ddof=1) effect_size = np.abs((test_group[0] - mu) / std) p = 2 * stats.norm.sf(effect_size) else: stat, p = stats.ttest_ind(test_group, control, equal_var=False) effect_size = cohen_d(test_group, control) if p_adjust: p *= k if p > 1: p = 1 temp['p-val'] = p temp['effect size'] = effect_size temp['size'] = size temp['power'] = tt_ind_solve_power(effect_size=effect_size, nobs1=size, alpha=alpha, ratio=len(control) / size) info[group[0]] = temp info = pd.DataFrame.from_dict(info) print('Testing {} groups for statistically significant effects on {}'. format(feature, target)) display(info.round(6)) # Plot test results X = list([str(x) for x in info.columns]) if not show_groups: fig, (ax1, ax2) = plt.subplots(2, sharex=True, figsize=figsize, gridspec_kw={"hspace": 0.05}) else: fig = plt.figure(constrained_layout=True, figsize=figsize) gs = fig.add_gridspec(2, 2) ax = fig.add_subplot(gs[:, 0]) if edgecolor is not None: ax = sns.boxplot(x=dataframe[feature], y=dataframe[target], whiskerprops={'color': edgecolor}, capprops={'color': edgecolor}, flierprops={ 'markerfacecolor': edgecolor, 'markeredgecolor': edgecolor }) else: ax = sns.boxplot(x=dataframe[feature], y=dataframe[target]) # fix edgecolors if needed: #if edgecolor is not None: # for i, artist in enumerate(ax.artists): # #artist.set_edgecolor(edgecolor) # for j in range(i*6, i*6+6): # if j in range(i*6+4, i*6+6): # continue # line = ax.lines[j] # line.set_color(edgecolor) # line.set_mfc(edgecolor) # line.set_mec(edgecolor) ax1 = fig.add_subplot(gs[0, 1]) ax2 = fig.add_subplot(gs[1, 1]) ax1.set_title('Target: {}'.format(target), color=text_color) if len(grouped) - 1 == 1: ax1.scatter( X, info.loc['p-val'], color='#3572C6', label='p-value', marker='x', linewidth=4, s=50, ) else: ax1.plot(X, info.loc['p-val'], color='#3572C6', label='p-value') ax1.axhline(y=alpha, ls='-.', label='alpha: {}'.format(alpha), alpha=0.7) ax1.set(xlabel='') ax1.legend() if len(grouped) - 1 == 1: ax2.scatter( X, info.loc['effect size'], color='g', label='effect size', marker='x', linewidth=4, s=50, ) else: ax2.plot(X, info.loc['effect size'], color='g', label='effect size') ax2.set_xlabel('{}'.format(feature), color=text_color) ax2.legend() plt.show()
""" d Value Meaning 1. 0 - 0.2 Negligible 2. 0.2 - 0.5 Small 3. 0.5 - 0.8 Medium 4. 0.80 + Large """ print(logo_grouped.count()) d = (8.58 - 8.44) / (np.std(dat.loc[dat.logo == 'Logo A', 'sentiment'])) print("d = ", d) print( "power = ", tt_ind_solve_power(effect_size=d, nobs1=32, alpha=0.05, power=None, ratio=1, alternative='two-sided')) ax = plt.figure(figsize=(8, 8)).gca() # define axis temp = dat[dat.logo != 'Logo C'] sns.boxplot(x='logo', y='sentiment', data=temp, ax=ax) sns.swarmplot(x='logo', y='sentiment', color='black', data=temp, ax=ax, alpha=0.4) #plt.show() print(