def analyzeData(results2): print('Accuracy') print( AnovaRM(data=results2, depvar='Accuracy', subject='Subject', within=['Condition'], aggregate_func='mean').fit()) MultiComp = MultiComparison(results2['Accuracy'], results2['Condition']) comp = MultiComp.allpairtest(sci.ttest_rel, method='bonf') print(comp[0]) print('Reaction Time') print( AnovaRM(data=results2, depvar='Reaction Time', subject='Subject', within=['Condition'], aggregate_func='mean').fit()) MultiComp = MultiComparison(results2['Reaction Time'], results2['Condition']) comp = MultiComp.allpairtest(sci.ttest_rel, method='bonf') print(comp[0])
def get_multiplecomparisons(self, dataframe, test): # If distributions are different then do multiple comparisons dataframe = dataframe.dropna() print(dataframe) cleanbin = dataframe.melt(var_name='Bin', value_name='Value') MultiComp = MultiComparison(cleanbin['Value'], cleanbin['Bin']) if test == 'ttest': comp = MultiComp.allpairtest(scipy.stats.ttest_rel, method='Bonf') else: comp = MultiComp.allpairtest(scipy.stats.wilcoxon, method='Bonf') print(comp[0])
def calculate_test(self): """It applies Holm-Bonferroni test to the dataframe. Bonferroni is a multi-comparison method. Discover more at https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method . Be sure you are working with a normal distribution""" MultiComp = MultiComparison(self.data.values,self.data.index) holm=MultiComp.allpairtest(stats.ttest_rel, method='Holm') print("\nHolm-Bonferroni test for rows\n"+str(holm) +"\n") self.results.write("\nHolm-Bonferroni test for rows\n"+str(holm) +"\n") MultiComp = MultiComparison(self.data.T.values,self.data.columns) holm2=MultiComp.allpairtest(stats.ttest_rel, method='Holm') print("\nHolm-Bonferroni test for columns\n"+str(holm2) +"\n") self.results.write("\nHolm-Bonferroni test for columns\n"+str(holm2) +"\n") return (holm,holm2)
def get_significance_booleans(data): ''' preform multiple comparisons (t-tests). paramters --------- data: Series must have a single level index containing the group labels (id). values are the results to be compared returns ------ booleans: Series boolean values indicating significance between the groups. ''' id = data.index.values value = data.values # multiple comparison multiple_comparisons = MultiComparison(value, id) # instanciate multiple comparisons object pairwise_holm = multiple_comparisons.allpairtest(ttest_ind, method='holm') # preform pairwise t-test significance_matrix = DataFrame(pairwise_holm[2]) # store results in dataframe groups_as_index = significance_matrix.set_index(['group1', 'group2']) significance_booleans = groups_as_index['reject'] return significance_booleans
def pairwise_ttest(val_vec, cnf): df = pd.DataFrame() cluster = [] score = [] for subc, dic_conf in val_vec.items(): cluster += [str(subc) for idx in range(len(dic_conf[cnf]))] score.extend(dic_conf[cnf]) df['subcluster'] = cluster df['score'] = score # all_comb = list(combinations(df.subcluster, 2)) # p_vals = [] # for comb in all_comb: # g1 = df[(df.subcluster == comb[0])]['score'] # g2 = df[(df.subcluster == comb[1])]['score'] # stat, pval = ttest_ind(g1, g2, equal_var=False) # p_vals.append(pval) # reject_list, corrected_p_vals = multipletests(p_vals, method='bonferroni')[:2] # for comb, pv, cpv, r in zip(all_comb, p_vals, corrected_p_vals, reject_list): # print("Comparison: {0} -- p={1}, corr_p={2}, rej={3}".format( # comb, pv, cpv, r)) MultiComp = MultiComparison(df['score'], df['subcluster']) comp = MultiComp.allpairtest(ttest_ind, method='bonf') print(comp[0]) pd.options.display.float_format = '{:.3f}'.format print(df.groupby(['subcluster']).describe())
def Holm_Bonferroni(multiComp:MultiComparison) -> float: """ Instead of the Tukey's test, we can do pairwise t-test Parameters ---------- multComp : Result of the 'MultiComparison'-test Returns ------- checkVal : the test paramter used for checking correct execution """ # First, with the "Holm" correction rtp = multiComp.allpairtest(stats.ttest_rel, method='Holm') print((rtp[0])) # and then with the Bonferroni correction print((multiComp.allpairtest(stats.ttest_rel, method='b')[0])) # Any value, for testing the program for correct execution checkVal = rtp[1][0][0,0] return checkVal
def get_anova_multiplecomp(self, accuracy_dataframe): # ANOVA and tukey test on the groups f, p = scipy.stats.f_oneway(accuracy_dataframe['Task1'], accuracy_dataframe['Task2'], accuracy_dataframe['Task2b']) print('Anova %0.5f' % p) # If distributions are different then do multiple comparisons if p < 0.05: df_melt = accuracy_dataframe.melt(var_name='Task', value_name='Error') df_melt = df_melt[df_melt.Task != 'Task3'] # print(df_melt) MultiComp = MultiComparison(df_melt['Error'], df_melt['Task']) comp = MultiComp.allpairtest(scipy.stats.ttest_rel, method='Holm') print(comp[0])
def get_anova_multiplecomp_bytimebin(self, bin_df): # ANOVA and tukey test on the groups # bin_df = bin_df.drop() f, p = scipy.stats.f_oneway(bin_df[bin_df.Bin == 'Bin0']['Value'], bin_df[bin_df.Bin == 'Bin1']['Value'], bin_df[bin_df.Bin == 'Bin2']['Value'], bin_df[bin_df.Bin == 'Bin3']['Value']) print('Anova %0.5f' % p) # Remove uneven bins before comparison clean_bin_df = bin_df[bin_df['Bin'].isin(['Bin0', 'Bin1', 'Bin2', 'Bin3'])] # print(clean_bin_df) # If distributions arse different then do multiple comparisons if p < 0.05: # print(df_melt) MultiComp = MultiComparison(clean_bin_df['Value'], clean_bin_df['Bin']) comp = MultiComp.allpairtest(scipy.stats.kruskal, method='Holm') print(comp[0])
def kruskal(*args): localargs = locals()['args'] print(localargs) statsk = stats.kruskal(*args) print(statsk) df = pd.DataFrame(localargs) print(df) stacked_data = df.stack().reset_index() print(stacked_data) stacked_data = stacked_data.rename(columns={ 'level_0': 'genotype', 0: 'result' }) print(stacked_data) MultiComp = MultiComparison(stacked_data['result'], stacked_data['genotype']) print(MultiComp.allpairtest(stats.mannwhitneyu, method='Holm'))
def hypothesis_test_four(cleaned_data): """ This function takes in cleaned data, then uses create sample dists to grab the required categories. From there the function performs fishers lSD analysis and displays a chart of all the pairwise compairisons and the p-values :param alpha: the critical value of choice :param cleaned_data: our cleaned dataset :return: """ # Get data for tests categories = ['NISS', 'FORD', 'HOND', 'TOY'] comparison_groups = create_sample_dists(cleaned_data, y_var='ticket', x_var='make', categories=categories, seed=4) list_for_lsd = [] for i in range(len(categories)): cat_list = [categories[i]] * 50 tk_lsd = zip(list(comparison_groups[i]), cat_list) list_for_lsd += list(tk_lsd) df_lsd = pd.DataFrame(list_for_lsd) # perform fisher LSD for the groups mult_comp = MultiComparison(df_lsd[0], df_lsd[1]) result = mult_comp.allpairtest(stats.ttest_ind, method='Holm') # we need to convert the simpletable result object into a dataframe result_summary = result[0].as_html() lsd_df = pd.read_html(result_summary, header=0, index_col=0)[0] lsd_df = lsd_df.drop(columns=['stat', 'pval_corr']) lsd_df.reject = lsd_df.pval.apply(compare_pval_alpha_tf) lsd_df.columns = ["Second Make", "P Value", "Signifigant Difference?"] lsd_df.index.names = ['First Make'] return lsd_df
# ## Tukey's multi-comparison method # # See https://en.wikipedia.org/wiki/Tukey's_range_test # # This method tests at P<0.05 (correcting for the fact that multiple comparisons are being made which would normally increase the probability of a significant difference being identified). A results of 'reject = True' means that a significant difference has been observed. # In[26]: from statsmodels.stats.multicomp import (pairwise_tukeyhsd, MultiComparison) # Set up the data for comparison (creates a specialised object) MultiComp = MultiComparison(stacked_data['result'], stacked_data['treatment']) # Show all pair-wise comparisons: # Print the comparisons print(MultiComp.tukeyhsd().summary()) # ## Holm-Bonferroni Method # # See: https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method # # The Holm-Bonferroni method is an alterantive method. # In[27]: comp = MultiComp.allpairtest(stats.ttest_rel, method='Holm') print(comp[0])
plt.xlim(*xlim) pair_labels = mod.groupsunique[np.column_stack(res2[1][0])] plt.xticks([0,1,2], pair_labels) plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' + '\n Pairwise Mean Differences') # Save to outfile outFile = 'MultComp.png' plt.savefig('MultComp.png', dpi=200) print 'Figure written to {0}'.format(outFile) plt.show() # Instead of the Tukey's test, we can do pairwise t-test # First, with the "Holm" correction rtp = mod.allpairtest(stats.ttest_rel, method='Holm') print rtp[0] # and then with the Bonferroni correction print mod.allpairtest(stats.ttest_rel, method='b')[0] # Done this way, the variance is calculated at each comparison. # If you want the joint variance across all samples, you have to # use a few tricks:(http://jpktd.blogspot.co.at/2013/03/multiple-comparison-and-tukey-hsd-or_25.html) res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment']) studentized_mean = res2[1][2] studentized_variance = res2[1][3] t_stat = (studentized_mean / studentized_variance) / np.sqrt(2) dof = len(dta2) - len(mod.groupsunique) my_pvalues = stats.t.sf(np.abs(t_stat), dof) * 2 # two-sided
def position_stats(df, name_mapping=None): # print '### position stats' from statsmodels.stats.weightstats import ztest from functools32 import partial, wraps POS = df.position.unique() POS.sort() model = 'value ~ group' allpvals = None header = None DF = None ttest_log_wrap = wraps( partial(ttest_ind_log, equal_var=False))(ttest_ind_log) ttest_ind_nev = wraps( partial(stats.ttest_ind, equal_var=False))(stats.ttest_ind) mwu_test = wraps(partial(stats.mannwhitneyu, use_continuity=False))( stats.mannwhitneyu) bootstrap_sample_num = 1000 # print df stats_test = ttest_ind_nev GROUPS = df.group.unique() # GROUPS = [0,3] for pos in POS: # print pos data = df[df.position == pos] data = data.groupby(['sid']).mean() data = resample_data(data, num_sample_per_pos=BOOTSTRAP_NUM) # print data # print data.group.unique() # data = df[(df.group == 0) | (df.group == 3)] # print data # sys.exit() #cross = smf.ols(model, data=data).fit() #anova = sm.stats.anova_lm(cross, type=1) # print data.group mcp = MultiComparison(data.value, data.group.astype(int)) rtp = mcp.allpairtest(stats_test, method='bonf') mheader = [] for itest in rtp[2]: name1 = itest[0] name2 = itest[1] if name_mapping is not None: name1 = name_mapping[str(name1)] name2 = name_mapping[str(name2)] mheader.append("{} - {}".format(name1, name2)) if not header or len(mheader) > len(header): header = mheader # get the uncorrecte pvals pvals = rtp[1][0][:, 1] ndf = pd.DataFrame(data=[pvals], columns=mheader) if allpvals is None: allpvals = ndf else: allpvals = pd.concat([allpvals, ndf]) # return allpvals # corr_pvals = allpvals # print allpvals # return allpvals flatten = allpvals.values.ravel() flatten = flatten * 2 mcpres = multipletests(flatten, alpha=0.05, method='bonf') # print mcpres corr_pvals = np.array(mcpres[1]) # print corr_pvals corr_pvals = np.reshape(corr_pvals, (len(POS), -1)) # print corr_pvals,corr_pvals.shape,header data = pd.DataFrame(data=corr_pvals, columns=header) data = data[data.columns[:3]] return data
#Check for heteroskedasticity sm.qqplot(anova_reg.resid, line='s') plt.show() ###### #Post Hoc Tests for One-way ANOVA #Tukey test - good when groups are the same size and have and homogeneous variance postHoc = pairwise_tukeyhsd(alldata['Fare_Per_Person'], alldata['Embarked'], alpha=0.05) print(postHoc) #Pairwise comparison using Bonferroni correction of p-values mc = MultiComparison(alldata['Fare_Per_Person'], alldata['Embarked']) #print(mc.allpairtest(stats.ttest_rel, method='Holm')[0]) #For paired t-test print(mc.allpairtest(stats.ttest_ind, method='b')[0]) #For independent t-test ###### #ANCOVA #Look for heteroskedasticity plt.plot(alldata[(alldata['Pclass']==2) & (alldata['Sex_male']==1)]['Fare_Per_Person'], alldata[(alldata['Pclass']==2) &(alldata['Sex_male']==1)]['Group_Size'], 'bo') plt.show() #Second class male passengers with a fare price > 0 seem OK #There are a couple group sizes with only 1 observation with these criteria though, so make sure to filter them out too #Test for heteroskedasticity print(levenes_test(alldata[(alldata['Pclass']==2) & (alldata['Sex_male']==1) & (alldata['Fare']>0) & (alldata['Group_Size'].isin([1,2,3,4,8,9,10,11]))]['Fare_Per_Person'], alldata[(alldata['Pclass']==2) & (alldata['Sex_male']==1) & (alldata['Fare']>0) & (alldata['Group_Size'].isin([1,2,3,4,8,9,10,11]))]['Group_Size'])) print(bartlett_test(alldata[(alldata['Pclass']==2) & (alldata['Sex_male']==1) & (alldata['Fare']>0) & (alldata['Group_Size'].isin([1,2,3,4,8,9,10,11]))]['Fare_Per_Person'], alldata[(alldata['Pclass']==2) & (alldata['Sex_male']==1) & (alldata['Fare']>0) & (alldata['Group_Size'].isin([1,2,3,4,8,9,10,11]))]['Group_Size'])) sub = alldata[(alldata['Pclass']==2) & (alldata['Sex_male']==1) & (alldata['Fare']>0) & (alldata['Group_Size'].isin([1,2,3,4,8,9,10,11]))]
def main(): # Note: the statsmodels module is required here. from statsmodels.stats.multicomp import (pairwise_tukeyhsd, MultiComparison) from statsmodels.formula.api import ols from statsmodels.stats.anova import anova_lm # Set up the data, as a structured array. # The first and last field are 32-bit intergers; the second field is an # 8-byte string. Note that here we can also give names to the individual # fields! dta2 = np.rec.array([ ( 1, 'mental', 2 ), ( 2, 'mental', 2 ), ( 3, 'mental', 3 ), ( 4, 'mental', 4 ), ( 5, 'mental', 4 ), ( 6, 'mental', 5 ), ( 7, 'mental', 3 ), ( 8, 'mental', 4 ), ( 9, 'mental', 4 ), ( 10, 'mental', 4 ), ( 11, 'physical', 4 ), ( 12, 'physical', 4 ), ( 13, 'physical', 3 ), ( 14, 'physical', 5 ), ( 15, 'physical', 4 ), ( 16, 'physical', 1 ), ( 17, 'physical', 1 ), ( 18, 'physical', 2 ), ( 19, 'physical', 3 ), ( 20, 'physical', 3 ), ( 21, 'medical', 1 ), ( 22, 'medical', 2 ), ( 23, 'medical', 2 ), ( 24, 'medical', 2 ), ( 25, 'medical', 3 ), ( 26, 'medical', 2 ), ( 27, 'medical', 3 ), ( 28, 'medical', 1 ), ( 29, 'medical', 3 ), ( 30, 'medical', 1 )], dtype=[('idx', '<i4'), ('Treatment', '|S8'), ('StressReduction', '<i4')]) # First, do an one-way ANOVA df = pd.DataFrame(dta2) model = ols('StressReduction ~ C(Treatment)',df).fit() anovaResults = anova_lm(model) print(anovaResults) if anovaResults['PR(>F)'][0] < 0.05: print('One of the groups is different.') #Then, do the multiple testing mod = MultiComparison(dta2['StressReduction'], dta2['Treatment']) print((mod.tukeyhsd().summary())) # The following code produces the same printout res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment']) #print res2[0] # Show the group names print((mod.groupsunique)) # Generate a print import matplotlib.pyplot as plt xvals = np.arange(3) plt.plot(xvals, res2.meandiffs, 'o') #plt.errorbar(xvals, res2.meandiffs, yerr=np.abs(res2[1][4].T-res2[1][2]), ls='o') errors = np.ravel(np.diff(res2.confint)/2) plt.errorbar(xvals, res2.meandiffs, yerr=errors, ls='o') xlim = -0.5, 2.5 plt.hlines(0, *xlim) plt.xlim(*xlim) pair_labels = mod.groupsunique[np.column_stack(res2._multicomp.pairindices)] plt.xticks(xvals, pair_labels) plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' + '\n Pairwise Mean Differences') # Save to outfile outFile = 'MultComp.png' plt.savefig('MultComp.png', dpi=200) print(('Figure written to {0}'.format(outFile))) plt.show() # Instead of the Tukey's test, we can do pairwise t-test # First, with the "Holm" correction rtp = mod.allpairtest(stats.ttest_rel, method='Holm') print((rtp[0])) # and then with the Bonferroni correction print((mod.allpairtest(stats.ttest_rel, method='b')[0])) # Done this way, the variance is calculated at each comparison. # If you want the joint variance across all samples, you have to # use a few tricks:(http://jpktd.blogspot.co.at/2013/03/multiple-comparison-and-tukey-hsd-or_25.html) res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment']) studentized_mean = res2.meandiffs studentized_variance = res2.variance t_stat = (studentized_mean / studentized_variance) / np.sqrt(2) dof = len(dta2) - len(mod.groupsunique) my_pvalues = stats.t.sf(np.abs(t_stat), dof) * 2 # two-sided # Now with the Bonferroni correction from statsmodels.stats.multitest import multipletests res_b = multipletests(my_pvalues, method='b') return res2.variance
w, p_bf = stats.levene(edg['WPM'], graf['WPM'], uni['WPM'], center='median') check_p('brown forsythe test', assumption='homogeneity of variance', p_val=p_bf) # non-significance shows we don't have a violation # now that we know our assumptions have not been violated, we can fit the ANOVA. This is the omnibus test alpha_lm = ols('WPM ~ C(Alphabet)', data=alpha).fit() logger.info(f'ANOVA summary: \n\n {alpha_lm.summary()}') # Prob (F-statistic) shows that there is some difference between the different Alphabets but does not tell us where the # difference is. For that we do the pairwise comparisons # tukey comparison followed by holm adjustment (not sure how to combine the two mc = MultiComparison(alpha['WPM'], alpha['Alphabet']) logger.info(f'tukey comparison2: \n {mc.tukeyhsd()}') comp = mc.allpairtest(stats.ttest_ind, method='Holm') logger.info(f'holm corrected version: \n {comp[0]}') # non parametric version of one-way ANOVA chi, p = stats.kruskal(edg['WPM'], graf['WPM'], uni['WPM']) check_p(descr='Kruskal chi squared test', assumption='', p_val=p) # mann whitney mw, p_eg = stats.mannwhitneyu(edg['WPM'], graf['WPM'], alternative='two-sided') logger.info(f'mann-whitney stat edg vs. graf: {mw}, p value: {p_eg}') mw, p_ug = stats.mannwhitneyu(uni['WPM'], graf['WPM'], alternative='two-sided') logger.info(f'mann-whitney stat uni vs. graf: {mw}, p value: {p_ug}') mw, p_ue = stats.mannwhitneyu(uni['WPM'], edg['WPM'], alternative='two-sided') logger.info(f'mann-whitney stat EC vs. PC: {mw}, p value: {p_ue}') rej, p_vals, _, _ = multitest.multipletests([p_eg, p_ug, p_ue], method='holm') for num, pv in enumerate(p_vals):
def KMWU(pathname='', pulsedurs=[5], genders=["_male", "_female", "_matedFemale"], neuronparts=["medial", "lateral"], identifiers=[".mat", "10ms", "40Hz"], key="pulsedff", compareOn="genders", multicompmethod='holm'): '''performs a Kruskal-wallis test followed by multiple comparisons with mann-whitney-U-test''' currentdir = os.getcwd() if pathname: if pathname[0] == '/': fullpath = pathname else: fullpath = os.path.join(currentdir, pathname) else: fullpath = currentdir dirlist = os.listdir(fullpath) ps = [] for pulsedur in pulsedurs: if pulsedur < 1: pulsedurstring = str(int(1000 * pulsedur)) + 'ms' else: pulsedurstring = str(int(pulsedur)) + 's' filelists = [] for n in neuronparts: for g in genders: gfiles = [ filename for filename in dirlist if g in filename and pulsedurstring in filename and all( [identifier in filename for identifier in identifiers]) ] nfiles = [filename for filename in gfiles if n in filename] if nfiles: filelists.append(nfiles) pulsedffs = [] for filelist in filelists: fullfile = os.path.join(fullpath, filelist[0]) data = scipy.io.loadmat(fullfile, matlab_compatible=True) pulsedff = [dat[0] for dat in data[key]] pulsedffs.append(pulsedff) if compareOn == "genders": for npart in range(len(neuronparts)): neuronpart = neuronparts[npart] print(neuronpart) groupnum = npart * len(genders) numgenders = len(genders) data = tuple(pulsedffs[groupnum:(groupnum + numgenders)]) df = pd.DataFrame(pulsedffs[groupnum:(groupnum + numgenders)]) df.rename(index={0: "female", 1: 'matedFemale', 2: 'male'}) statsk = scipy.stats.kruskal(*data) print(statsk) stacked_data = df.stack().reset_index() stacked_data.rename(index={ 0: "female", 1: 'matedFemale', 2: 'male' }) stacked_data = stacked_data.rename(columns={ 'level_0': 'genotype', 0: 'result' }) MultiComp = MultiComparison(stacked_data['result'], stacked_data['genotype']) print( MultiComp.allpairtest(scipy.stats.ranksums, method='Holm')) elif compareOn == "neuronparts": for gend in range(len(genders)): ind = [(numnpart * len(genders) + gend) for numnpart in range(len(neuronparts))] data = tuple([pulsedffs[index] for index in ind]) df = pd.DataFrame([pulsedffs[index] for index in ind]) statsk = scipy.stats.kruskal(*data) print(statsk) stacked_data = df.stack().reset_index() stacked_data.rename(index={0: "medial", 1: 'lateral'}) stacked_data = stacked_data.rename(columns={ 'level_0': 'neuronpart', 0: 'result' }) MultiComp = MultiComparison(stacked_data['result'], stacked_data['neuronpart']) print( MultiComp.allpairtest(scipy.stats.ranksums, method='Holm')) else: print( "not a valid selection for compareOn - must be \"genders\" or \"neuronparts\"" )
parser.add_argument('--output', required=True, default='MultiComparison.csv', help='out file name.') args = parser.parse_args() method = args.method in_path = args.input # output = args.output col1 = args.col1 col2 = args.col2 df = pd.read_csv(in_path) df[col2] = df[col2].astype("float64") # print(df.head(3)) multiComp = MultiComparison(df[col2], df[col1]) if method == 'Tukey': print(multiComp.tukeyhsd().summary()) result = multiComp.tukeyhsd().summary() resultdf = pd.DataFrame(result) resultdf.to_csv(args.output, header=None, index=False) else: print(multiComp.allpairtest(stats.ttest_rel, method=method)[-1]) result = multiComp.allpairtest(stats.ttest_rel, method=method)[-1] resultdf = pd.DataFrame(result) resultdf.to_csv(args.output, index=False) #python MultiComparison.py --method "Tukey" --input ../../data/sample.csv --col1 Treatment --col2 "StressReduction" --output ../../out/MultiComparison.csv
def main(): # Note: the statsmodels module is required here. from statsmodels.stats.multicomp import (pairwise_tukeyhsd, MultiComparison) from statsmodels.formula.api import ols from statsmodels.stats.anova import anova_lm # Set up the data, as a structured array. # The first and last field are 32-bit intergers; the second field is an # 8-byte string. Note that here we can also give names to the individual # fields! dta2 = np.rec.array([(1, 'mental', 2), (2, 'mental', 2), (3, 'mental', 3), (4, 'mental', 4), (5, 'mental', 4), (6, 'mental', 5), (7, 'mental', 3), (8, 'mental', 4), (9, 'mental', 4), (10, 'mental', 4), (11, 'physical', 4), (12, 'physical', 4), (13, 'physical', 3), (14, 'physical', 5), (15, 'physical', 4), (16, 'physical', 1), (17, 'physical', 1), (18, 'physical', 2), (19, 'physical', 3), (20, 'physical', 3), (21, 'medical', 1), (22, 'medical', 2), (23, 'medical', 2), (24, 'medical', 2), (25, 'medical', 3), (26, 'medical', 2), (27, 'medical', 3), (28, 'medical', 1), (29, 'medical', 3), (30, 'medical', 1)], dtype=[('idx', '<i4'), ('Treatment', '|S8'), ('StressReduction', '<i4')]) # First, do an one-way ANOVA df = pd.DataFrame(dta2) model = ols('StressReduction ~ C(Treatment)', df).fit() anovaResults = anova_lm(model) print(anovaResults) if anovaResults['PR(>F)'][0] < 0.05: print('One of the groups is different.') #Then, do the multiple testing mod = MultiComparison(dta2['StressReduction'], dta2['Treatment']) print((mod.tukeyhsd().summary())) # The following code produces the same printout res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment']) #print res2[0] # Show the group names print((mod.groupsunique)) # Generate a print import matplotlib.pyplot as plt xvals = np.arange(3) plt.plot(xvals, res2.meandiffs, 'o') #plt.errorbar(xvals, res2.meandiffs, yerr=np.abs(res2[1][4].T-res2[1][2]), ls='o') errors = np.ravel(np.diff(res2.confint) / 2) plt.errorbar(xvals, res2.meandiffs, yerr=errors, ls='o') xlim = -0.5, 2.5 plt.hlines(0, *xlim) plt.xlim(*xlim) pair_labels = mod.groupsunique[np.column_stack( res2._multicomp.pairindices)] plt.xticks(xvals, pair_labels) plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' + '\n Pairwise Mean Differences') # Save to outfile outFile = 'MultComp.png' plt.savefig('MultComp.png', dpi=200) print(('Figure written to {0}'.format(outFile))) plt.show() # Instead of the Tukey's test, we can do pairwise t-test # First, with the "Holm" correction rtp = mod.allpairtest(stats.ttest_rel, method='Holm') print((rtp[0])) # and then with the Bonferroni correction print((mod.allpairtest(stats.ttest_rel, method='b')[0])) # Done this way, the variance is calculated at each comparison. # If you want the joint variance across all samples, you have to # use a few tricks:(http://jpktd.blogspot.co.at/2013/03/multiple-comparison-and-tukey-hsd-or_25.html) res2 = pairwise_tukeyhsd(dta2['StressReduction'], dta2['Treatment']) studentized_mean = res2.meandiffs studentized_variance = res2.variance t_stat = (studentized_mean / studentized_variance) / np.sqrt(2) dof = len(dta2) - len(mod.groupsunique) my_pvalues = stats.t.sf(np.abs(t_stat), dof) * 2 # two-sided # Now with the Bonferroni correction from statsmodels.stats.multitest import multipletests res_b = multipletests(my_pvalues, method='b') return res2.variance