def calculate_power(effectSize, powerThreshold=0.8, alpha=0.05): """ Calculates the power at multiple different number of observations and will return a plot of the power curve as well as giving the minimum value to have a power over 0.80 for the given effect size. Args: effectSize (list or array of floats): A value between 0 and 1 (calculated by cohens_D) powerThreshold (float): Value used to determine minimum number of observations needed to commit type II errors with a (1 - value) chance. ie a powerThreshold of 0.8 means that the output will be the minimum number of observations needed to only have a 20% chance of committing type II error alpha (float): Non-zero value that is less than 1 that acts as the threshold for significance Returns: Number of of observations needed to pass the specified power threshold for a given effect size """ messages = [] cellCount = np.arange(1, 100, 1) analysis = power.TTestIndPower() for size in effectSize: size = 0.1 if size < 0.1 else size # effect size less than 0.2 is trivial. Anything less than 0.1 is set to 0.1 for count in cellCount: pow_value = analysis.power(size, count, alpha) if pow_value > powerThreshold: messages.append( "Effect size of {0} requires {1} cells to have a power greater than {2}" .format(size, count, powerThreshold)) break for message in messages: print(message)
def compute_power_and_p_value(self, control_rate, exp_rate): control_data = control_rate.select("control_ratio").collect() exp_data = exp_rate.select("exp_ratio").collect() control_count = float(control_rate.count()) exp_count = float(exp_rate.count()) control_rate_stats = control_rate.describe() exp_rate_stats = exp_rate.describe() control_rate_stats.show() exp_rate_stats.show() stddev_control_rate = float(control_rate_stats.select("control_ratio").collect()[2].control_ratio) stddev_exp_rate = float(exp_rate_stats.select("exp_ratio").collect()[2].exp_ratio) mean_control_rate = float(control_rate_stats.select("control_ratio").collect()[1].control_ratio) mean_exp_rate = float(exp_rate_stats.select("exp_ratio").collect()[1].exp_ratio) percent_diff = abs(mean_control_rate - mean_exp_rate) / mean_control_rate pooled_stddev = math.sqrt(((pow(stddev_control_rate, 2) * (control_count - 1)) + \ (pow(stddev_exp_rate, 2) * (exp_count - 1))) / \ ((control_count - 1) + (exp_count - 1))) effect_size = (percent_diff * float(mean_control_rate)) / float(pooled_stddev) power = smp.TTestIndPower().solve_power(effect_size, nobs1=control_count, ratio=exp_count / control_count, alpha=self.ALPHA_ERROR, alternative='two-sided') p_val = stats.ttest_ind(control_data, exp_data, equal_var = False).pvalue return power, p_val
def getESS(b, d, IF): """ Calculate ESS for b: baserate of true hypotheses (between 0 and 1), d: Effect size (Cohens d), IF: Income factor (# of sample pairs purchasable per publication) """ SS = np.arange(4, 1000, 2) Power = np.zeros(len(SS)) falsePR = np.zeros(len(SS)) truePR = np.zeros(len(SS)) totalPR = np.zeros(len(SS)) Income = np.zeros(len(SS)) Profit = np.zeros(len(SS)) for i, s in enumerate(SS): ''' 1-sample t-test ''' # analysis = getpower.TTestPower() # Power[i] = analysis.solve_power(effect_size=d, nobs=s, alpha=alpha, power=None, alternative='two-sided') ''' 2-sample t-test ''' analysis = getpower.TTestIndPower() Power[i] = analysis.solve_power(effect_size=d, nobs1=s, ratio=1.0, alpha=alpha, power=None, alternative='two-sided') falsePR[i] = alpha * (1 - b) truePR[i] = Power[i] * b totalPR[i] = falsePR[i] + truePR[i] Income[i] = totalPR[i] * IF Profit[i] = Income[i] - s ESSidx = np.argmax(Profit) ESS = SS[ESSidx] SSSidx = (np.abs(Power - 0.8)).argmin() SSS = SS[SSSidx] TPR_ESS = totalPR[ESSidx] PPV_ESS = truePR[ESSidx] / totalPR[ESSidx] PPV_SSS = truePR[SSSidx] / totalPR[SSSidx] Power_ESS = Power[ESSidx] ''' ESS = equilibrium sample size (sample size at which Profit is maximal) SSS = scientifically appropriate sample size (with power=80%) TPR_ESS = total positive rate at ESS (describes published literature) PPV_ESS = positive predictive value at ESS Power_ESS = power at ESS PPV_SSS, positive predictive value at SSS Income = vector of income for each tested sample size SS = vector of tested sample sizes Profit = vector of profit for each tested sample size ''' return ESS, SSS, TPR_ESS, PPV_ESS, Power_ESS, PPV_SSS, Income, SS, Profit
def _power_and_ttest(self, control_vals, exp_vals): control_mean = statistics.mean(control_vals) control_std = statistics.stdev(control_vals) exp_mean = statistics.mean(exp_vals) exp_std = statistics.stdev(exp_vals) pooled_stddev = self._compute_pooled_stddev(control_std, exp_std, control_vals, exp_vals) power = 0 percent_diff = None if control_mean != 0 and pooled_stddev != 0: percent_diff = (control_mean - exp_mean) / float(control_mean) effect_size = (abs(percent_diff) * float(control_mean)) / float(pooled_stddev) power = smp.TTestIndPower().solve_power(effect_size, nobs1=len(control_vals), ratio=len(exp_vals) / float(len(control_vals)), alpha=self.ALPHA_ERROR, alternative='two-sided') ttest_result = stats.ttest_ind(control_vals, exp_vals, equal_var=False) p_val = "" if len(ttest_result) >= 2 and not math.isnan(ttest_result[1]): p_val = ttest_result[1] mean_diff = exp_mean - control_mean if p_val <= self.ALPHA_ERROR and mean_diff < 0: significance = "Negative" elif p_val <= self.ALPHA_ERROR and mean_diff > 0: significance = "Positive" else: significance = "Neutral" return { "power": power, "p_val": p_val, "control_mean": control_mean, "mean_diff": mean_diff, "percent_diff": 0 if percent_diff is None else percent_diff * -100, "significance": significance, }
def f_powerTest(df_iris): #parameters for power analysis effect = 0.8 alpha = 0.05 power = 0.8 #perform power analysis analysis = pwr.TTestIndPower() result = analysis.solve_power(effect, power=power, nobs1=None, ratio=1.0, alpha=alpha) print('Minimum Sample Size: %.3f' % result, file=outfile) if len(df_iris) >= result: print( 'Sample size is sufficient for effect size of 0.8 and power of 0.8.', file=outfile) else: printf('**LOW POWER OF DATASET: SAMPLE SIZE TOO SMALL**', file=outfile)
def estimatePowerN(m1, m2, var1, var2, n1, n2): '''Estimate sample size needed. m: mean var: variance v: sample size. Ref: webpower_manual_book, page 61. ''' sp = pow((((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)), 0.5) # print(sp) # print(m1-m2) effectSize = (m1 - m2) / sp # print(effectSize) re = smp.TTestIndPower().solve_power(abs(effectSize), power=power, ratio=ratio, alpha=alpha, alternative=alternative) # print(re) # ceil(x) #Return the ceiling of x as a float, the smallest integer value greater than or equal to x. # print(re) return math.ceil(re)
adult_nhanes_data = nhanes_data.query('AgeInYearsAtScreening > 18') adult_nhanes_data = adult_nhanes_data.dropna(subset=['WeightKg']).rename(columns={'WeightKg': 'Weight'}) # %% [markdown] # ## Power analysis # # We can compute a power analysis using functions from the `statsmodels.stats.power` package. Let's focus on the power for an independent samples t-test in order to determine a difference in the mean between two groups. Let's say that we think than an effect size of Cohen's d=0.5 is realistic for the study in question (based on previous research) and would be of scientific interest. We wish to have 80% power to find the effect if it exists. We can compute the sample size needed for adequate power using the `TTestIndPower()` function: # %% import scipy.stats import statsmodels.stats.power as smp import matplotlib.pyplot as plt power_analysis = smp.TTestIndPower() sample_size = power_analysis.solve_power(effect_size=0.5, power=0.8, alpha=0.05) sample_size # %% [markdown] # Thus, about 64 participants would be needed in each group in order to test the hypothesis with adequate power. # # ## Power curves # # We can also create plots that can show us how the power to find an effect varies as a function of effect size and sample size, at the alpha specified in the power analysis. We will use the `plot_power()` function. The x-axis is defined by the `dep_var` argument, while sample sizes (nobs) and effect sizes (effect_size) are provided as arrays. # %% #+ effect_sizes = np.array([0.2, 0.5, 0.8]) sample_sizes = np.array(range(10, 500, 10)) plt.style.use('seaborn')
def t_test_with_power_comp(x1, x2, alternative='two-sided', alpha=0.05, power=0.8): """ Independent (as contrasted with Paired) t-test with power calculation based on n_obs; effect size based on estimate from input. """ import statsmodels.stats.power as smpwr import statsmodels.api as sm import numpy as np import matplotlib.pyplot as plt t_stat, pvalue, degrees_of_freedom = sm.stats.ttest_ind( x1=x1, x2=x2, alternative=alternative) print( "T: {t_stat}, p-value: {pvalue}, degrees of freedom: {degrees_of_freedom}," " n_obs_1 = {n_obs_1}, n_obs_2 = {n_obs_2}".format( t_stat=t_stat, degrees_of_freedom=degrees_of_freedom, pvalue=pvalue, n_obs_1=len(x1), n_obs_2=len(x2))) # Power calculation pooled_standard_dev_empirical = np.sqrt(np.mean([np.std(x1), np.std(x2)])) mean_diff_empirical = abs(np.mean(x1) - np.mean(x2)) effect_size_empirical = mean_diff_empirical / pooled_standard_dev_empirical print( "Empirical pooled stdev: {:.2f}".format(pooled_standard_dev_empirical)) print("Mean diff empirical: {:.2f}\neffect size empirical: {:.2f}".format( mean_diff_empirical, effect_size_empirical)) # Empirical power needed nobs1 = smpwr.tt_ind_solve_power( effect_size=effect_size_empirical, nobs1=None, alpha=alpha, power=power, alternative=alternative, ) print( "With alpha {alpha}, power {power}, need ≈ {nobs1:.0f} observations of each type to achieve significance" .format(alpha=alpha, power=power, nobs1=nobs1)) # Power vs. nobs smpwr.TTestIndPower().plot_power(dep_var='nobs', nobs=np.arange(2, 10), effect_size=[effect_size_empirical], alternative=alternative, alpha=alpha) plt.show()
def getESS_cet(b,d,IF): """ Calculate ESS given conditional equivalence testing for b: baserate of true hypotheses (between 0 and 1), d: Effect size (Cohens d), IF: Income factor (# of sample pairs purchasable per publication) and x: minimally relevant effect size as a fraction of d """ x=1 SS = np.arange(4,1000,2) Power = np.zeros(len(SS)) Power_cet = np.zeros(len(SS)) falsePR = np.zeros(len(SS)) truePR = np.zeros(len(SS)) falseNR_cet = np.zeros(len(SS)) trueNR_cet = np.zeros(len(SS)) totalPR = np.zeros(len(SS)) Income = np.zeros(len(SS)) Profit = np.zeros(len(SS)) for i,s in enumerate(SS): ''' 1-sample t-test ''' # analysis = getpower.TTestPower() # Power[i] = analysis.solve_power(effect_size=d, nobs=s, alpha=alpha, power=None, alternative='two-sided') ''' 2-sample t-test ''' analysis = getpower.TTestIndPower() Power[i] = analysis.solve_power(effect_size=d, nobs1=s, ratio=1.0, alpha=alpha, power=None, alternative='two-sided') falsePR[i] = alpha * (1-b) truePR[i] = Power[i] * b ''' under cet studies not finding a significant positive result are tested for significant negative results, where the latter are determined by two one-sided t-tests (TOST, Campbell and Gustafson, 2018). TOST power calculation from R TOSTER package (Lakens 2017)''' R_command = 'TOSTER::powerTOSTtwo(alpha=0.05, N='+str(s)+', low_eqbound_d='+str(-x*d)+', high_eqbound_d='+str(-x*d)+')' Power_cet[i] = ro.r(R_command)[0] falseNR_cet[i] = alpha * b * (1-Power[i]) trueNR_cet[i] = Power_cet[i] * (1-b) * (1-alpha) '''the correction factors (1-Power[i]) and (1-alpha) above account for the fact that positive results will not be subjected to the equivalence test.''' ''' here (with cet) total 'publishable' rate incorporates significant alternative hypotheses as well as significant null hypotheses ''' totalPR[i] = falsePR[i] + truePR[i] + falseNR_cet[i] + trueNR_cet[i] # totalPR[i] = falseNR_cet[i] + trueNR_cet[i] Income[i] = totalPR[i] * IF Profit[i] = Income[i] - s ESSidx = np.argmax(Profit) ESS = SS[ESSidx] SSSidx = (np.abs(Power-0.8)).argmin() SSS = SS[SSSidx] TPR_ESS = totalPR[ESSidx] PPV_ESS = (truePR[ESSidx]+trueNR_cet[ESSidx])/totalPR[ESSidx] P_cet = Power_cet[ESSidx] Power_ESS = Power[ESSidx] ''' ESS = equilibrium sample size (sample size at which Profit is maximal) SSS = scientifically appropriate sample size (with power=80%) TPR_ESS = total positive rate at ESS (describes published literature) PPV_ESS = positive predictive value at ESS Power_ESS = power at ESS P_cet, positive predictive value at SSS Income = vector of income for each tested sample size SS = vector of tested sample sizes Profit = vector of profit for each tested sample size ''' return ESS, SSS, TPR_ESS, PPV_ESS, Power_ESS, P_cet, Income, SS, Profit
def calculate_stats(self): """Pearson correlation between ankle and wrist counts, independent samples T-test, Wilcoxon signed rank test.""" # COUNTS COMPARISON ------------------------------------------------------------------------------------------ self.r = scipy.stats.pearsonr(self.df["Ankle Valid Counts"], self.df["Wrist Valid Counts"]) print("\nCorrelation (ankle ~ wrist counts): r = {}, p = {}".format(round(self.r[0], 3), round(self.r[1], 3))) # BETWEEN-GROUP COMPARISONS ---------------------------------------------------------------------------------- print("\n------------------- Comparison between {} groups created using {}" "------------------- ".format(self.n_groups, self.sort_accel)) self.t_crit = scipy.stats.t.ppf(0.05, df=self.n_per_group*2-2) # Ankle counts self.ttest_counts = scipy.stats.ttest_ind(self.low_active[self.sort_accel], self.high_active[self.sort_accel]) print("Independent T-tests:") print("-Counts: t = {}, p = {}".format(round(self.ttest_counts[0], 3), round(self.ttest_counts[1], 3))) # Consider one-sided self.wilcoxon_counts = scipy.stats.wilcoxon(self.low_active[self.sort_accel], self.high_active[self.sort_accel]) # Age self.ttest_age = scipy.stats.ttest_ind(self.low_active["Age"], self.high_active["Age"]) print("-Age: t = {}, p = {}".format(round(self.ttest_age[0], 3), round(self.ttest_age[1], 3))) # Weight self.ttest_weight = scipy.stats.ttest_ind(self.low_active["Weight"], self.high_active["Weight"]) print("-Weight: t = {}, p = {}".format(round(self.ttest_weight[0], 3), round(self.ttest_weight[1], 3))) # Height self.ttest_height = scipy.stats.ttest_ind(self.low_active["Height"], self.high_active["Height"]) print("-Height: t = {}, p = {}".format(round(self.ttest_height[0], 3), round(self.ttest_height[1], 3))) self.ttest_bmi = scipy.stats.ttest_ind(self.low_active["BMI"], self.high_active["BMI"]) print("-BMI: t = {}, p = {}".format(round(self.ttest_bmi[0], 3), round(self.ttest_bmi[1], 3))) # Sex group1_n_females = [i for i in self.low_active["Sex"].values].count(0) group2_n_females = [i for i in self.high_active["Sex"].values].count(0) print("\n-Females per group:") print(" -Low activity: {}".format(group1_n_females)) print(" -High activity: {}".format(group2_n_females)) # Effect size and statistical power --------------------------------------------------------------------------- sd1 = self.low_active.describe()[self.sort_accel][2] mean1 = self.low_active.describe()[self.sort_accel][1] sd2 = self.high_active.describe()[self.sort_accel][2] mean2 = self.high_active.describe()[self.sort_accel][1] pooled_sd = ((sd1 ** 2 + sd2 ** 2) / 2) ** (1 / 2) self.cohens_d = round((mean2 - mean1) / pooled_sd, 3) print("\nPOWER ANALYSIS") print("\n-Effect size between average counts: d = {}".format(self.cohens_d)) # Statistical power ------------------------------------------------------------------------------------------- self.power_object = smp.TTestIndPower() self.n_required = self.power_object.solve_power(abs(self.cohens_d), power=0.8, alpha=0.05) self.achieved_power = smp.TTestIndPower().solve_power(self.cohens_d, nobs1=5, ratio=1, alpha=.05) print("-Sample size required to reach for β of 0.80 is {}.".format(round(self.n_required, 2))) print("-Attained power = {}".format(round(self.achieved_power, 3))) if self.achieved_power > 0.8: print(" -Acceptable power attained.")
# %% [markdown] # This shows, segment (being high-value or low-value) significantly affects the purchase count but group doesn’t since it is almost 70%, way higher than 5%. # # Now we know how to select our groups and evaluate the results. But there is one more missing part. To reach statistical significance, our sample size should be enough. Let’s see how we can calculate it. # %% [markdown] # # Sample Size Calculation # To calculate the required sample size, first we need to understand two concepts: # * __Effect size__: this represents the magnitude of difference between averages of test and control group. It is the variance in averages between test and control groups divided by the standard deviation of the control. # * __Power__: this refers to the probability of finding a statistical significance in your test. To calculate the sample size, 0.8 is the common value that is being used. # # Let’s build our dataset and see the sample size calculation in an example: # %% from statsmodels.stats import power ss_analysis = power.TTestIndPower() #create hv segment df_hv = pd.DataFrame() df_hv['customer_id'] = np.arange(20000) df_hv['segment'] = 'high-value' df_hv['prev_purchase_count'] = np.random.poisson(0.7, 20000) # %% purchase_mean = df_hv.prev_purchase_count.mean() purchase_std = df_hv.prev_purchase_count.std() # %% print(np.round(purchase_mean, 4), np.round(purchase_std, 4)) # %% [markdown]
def calculate_unpaired_ttest_php(es, n_per_group): power = smp.TTestIndPower().power(effect_size=es, nobs1=n_per_group, alpha=0.05, df=2*n_per_group-2, ratio=1, alternative='two-sided') print(power)
def compute(cls, observation, prediction): """Compute a t statistic and a p_value from an observation and a prediction.""" p_mean = prediction['mean'] p_std = prediction['std'] p_n = prediction['n'] p_var = p_std**2 #2 samples t-test if isinstance(observation, dict): o_mean = observation['mean'] o_std = observation['std'] o_n = observation['n'] o_var = o_std**2 #If the 2 variances are too different, perform a Welch t-test if p_var / o_var > 2 or o_var / p_var > 2: value, p_val = st.ttest_ind_from_stats(p_mean, p_std, p_n, o_mean, o_std, o_n, equal_var=False) vnp = p_var / p_n vno = o_var / o_n #Welch-Satherwaite equation to compute the degrees of freedom dof = (vnp + vno)**2 / (vnp**2 / (p_n - 1) + vno**2 / (o_n - 1)) #If the 2 variances are similar, perform a 2 sample independant Student t-test else: value, p_val = st.ttest_ind_from_stats(p_mean, p_std, p_n, o_mean, o_std, o_n, equal_var=True) dof = o_n + p_n - 2 #Compute the statistical power of the test power = pw.TTestIndPower().power(effect_size=CohenDScore.compute( observation, prediction).score, nobs1=p_n, ratio=float(o_n) / p_n, alpha=0.05) #1 sample t-test else: value, p_val = st.ttest_ind_from_stats(p_mean, p_std, p_n, observation, std2=0, nobs2=2, equal_var=False) #Compute the statistical power of the test power = pw.TTestPower().power(effect_size=CohenDScore.compute( { "mean": observation, "std": 0 }, prediction).score, nobs=p_n, alpha=0.05) o_mean = observation dof = p_n - 1 return StudentsTestScore(value, related_data={ "dof": dof, "p_value": p_val, "power": power, "diffmean": p_mean - o_mean })