def pg_ttest(data, group_col, group1, group2, fdr=0.05, value_col='MS signal [Log2]'): ''' data: long data format with ProteinID as index, one column of protein levels, other columns of grouping. ''' df = data.copy() proteins = data.index.unique() columns = pg.ttest(x=[1, 2], y=[3, 4]).columns scores = pd.DataFrame(columns=columns) for i in proteins: df_ttest = df.loc[i] x = df_ttest[df_ttest[group_col] == group1][value_col] y = df_ttest[df_ttest[group_col] == group2][value_col] difference = y.mean() - x.mean() result = pg.ttest(x=x, y=y) result['protein'] = i result['difference'] = difference scores = scores.append(result) scores = scores.assign(new_column=lambda x: -np.log10(scores['p-val'])) scores = scores.rename({'new_column': '-Log pvalue'}, axis=1) #FDR correction reject, qvalue = multi.fdrcorrection(scores['p-val'], alpha=0.05, method='indep') scores['qvalue'] = qvalue scores['rejected'] = reject scores = scores.set_index('protein') return scores
def two_sample_ttests(group_var, DVs, data, paired=False, tails='two-sided', test_name='Mean Difference', **correction_args): """ Performs a series of two-sample t-tests, collecting all the statistics and returning a nicely formatted dataframe of results. Args: group_var (string): The variables that will be used to group/split the dataset. Bust have only two levels. DVs (list-like): the names of dependent variables. One t-test will be done for each element of this list. data (Pandas dataframe): the raw data frame with variables as columns and rows as observations. Optional Args: paired (boolean): if True, performs paired-sample t-tests. tails (string): specifies "two-sided" or "one-sided" t-tests. test_name (string): the label for the test difference column. **correction args: keyword arguments that get passed to adust_pvals() """ from pingouin import ttest grp_data = [d for _, d in data.groupby(group_var)] assert (len(grp_data) == 2) results = [] for dv in DVs: t = ttest(grp_data[0][dv], grp_data[1][dv], confidence=(1 - corrected_alpha_from(**correction_args)), paired=paired, tail=tails) t.index.names = ['contrast'] t['value'] = grp_data[0][dv].mean() - grp_data[1][dv].mean() results.append(t) results = (pd.concat(results, names=['score'], keys=DVs).rename(columns={ 'p-val': 'p', 'T': 'tstat', 'dof': 'df', 'value': 'diff' }, index={'T-test': test_name})) # unpack the CIs provided by pingouin ci_col = [c for c in results.columns if c[0:2] == 'CI'][0] results = (results.assign( CI_lower=results[ci_col].apply(lambda x: x[0])).assign( CI_upper=results[ci_col].apply(lambda x: x[1])).rename( columns={ci_col: 'CI'})) results = adjust_pvals(results, **correction_args) return results
def perform_kappa_t_tests(self): self.anklewrist_t = pg.ttest( x=self.df_kappa["All_AnkleWrist"].dropna(), y=self.df_kappa["AnkleWrist"], paired=False, correction="Auto") print("\nAnkle-Wrist Comparison") print(self.anklewrist_t) self.wristhr_t = pg.ttest(x=self.df_kappa["All_WristHR"].dropna(), y=self.df_kappa["WristHR"].dropna(), paired=False, correction="Auto") print("\nWrist-HR Comparison") print(self.wristhr_t)
def generate_quality_report(self, write_report=True): """Calculates how much of the data was usable. Returns values in dictionary.""" valid_epochs = self.epoch_validity.count(0) # number of valid epochs invalid_epochs = self.epoch_validity.count(1) # number of invalid epochs hours_lost = round(invalid_epochs / (60 / self.epoch_len) / 60, 2) # hours of invalid data perc_valid = round(valid_epochs / len(self.epoch_validity) * 100, 1) # percent of valid data perc_invalid = round(invalid_epochs / len(self.epoch_validity) * 100, 1) # percent of invalid data # Average Bittium accelerometer counts during invalid, valid, and non-wear epochs ---------------------------- df_valid = self.output_df.groupby("Valid").get_group("Valid") df_invalid = self.output_df.groupby("Valid").get_group("Invalid") df_invalid = df_invalid.loc[df_invalid["Wear"] == "Wear"] df_nonwear = self.output_df.groupby("Wear").get_group("Nonwear") if self.load_accel: valid_counts = df_valid.describe()["AccelCounts"]['mean'] invalid_counts = df_invalid.describe()["AccelCounts"]['mean'] nonwear_counts = df_nonwear.describe()["AccelCounts"]['mean'] ttest = pg.ttest(df_valid["AccelCounts"], df_invalid["AccelCounts"], paired=False) print("\nUnpaired T-test results: valid vs. invalid ECG epochs' activity counts:") print("t({}) = {}, p = {}, Cohen's d = {}.".format(round(ttest["dof"].iloc[0], 1), round(ttest["T"].iloc[0], 2), round(ttest["p-val"].iloc[0], 3), round(ttest["cohen-d"].iloc[0], 3))) t = round(ttest["T"].iloc[0], 3) p = round(ttest["p-val"].iloc[0], 5) if not self.load_accel: invalid_counts = 0 valid_counts = 0 nonwear_counts = 0 t = 0 p = 0 quality_report = {"Invalid epochs": invalid_epochs, "Hours lost": hours_lost, "Percent valid": perc_valid, "Percent invalid": perc_invalid, "Valid counts": round(valid_counts, 1), "Invalid counts": round(invalid_counts, 1), "Nonwear counts": round(nonwear_counts, 1), "Counts T": t, "Counts p": p} print("{}% of the data is valid.".format(round(100 - perc_invalid), 3)) if write_report: df = pd.DataFrame(list(zip([i for i in quality_report.keys()], [i for i in quality_report.values()])), columns=["Variable", "Value"]) df.to_csv(path_or_buf=self.output_dir + self.filename + "_QualityReport.csv", sep=",", index=False) return quality_report
def analyse(baseline, modified): try: stats = ttest(baseline.builds, modified.builds, paired=True, tail="greater").round(3) pvalue = stats.loc["T-test", "p-val"] improvement_detected = pvalue < SIGNIFICANCE_LEVEL details = PairedTTestDetails(pvalue, SIGNIFICANCE_LEVEL) return AnalysisResults(baseline, modified, details, improvement_detected) except: logging.exception("Error when running analyser") raise Exception("Failed when running analyser for benchmarks")
def test_independence(self, column_name, test_stat, theor=False, direction='both', **kwargs): """ Permutation based independence test, desired test stat should be provided Args: test_stat (str or list of string): np.mean, np.median, np.std, np.percentile or several theor (str): provide theoretical t_stat results nsamples(int): number of samples for simulation direction (str): column_name (str): column name in dataframe for testing Returns: empirical test statitistics test_stat sample simulation based p_val dataframe with ttest output if theor = True """ self.test_type = 'independence' self.direction = direction self.theor = theor def permutation_sample(data1, data2): data = np.concatenate((data1, data2)) permuted_data = np.random.permutation(data) return permuted_data[:len(data1)], permuted_data[len(data1):] data1 = self.data[self.data['male'] == 0][column_name] data2 = self.data[self.data['male'] == 1][column_name] p1, p2 = permutation_sample(data1, data2) # empirical test statitistics t_diff = test_stat(data2) - test_stat(data1) # test_stat sample sample_test = [] # simulation based p_val diffs = np.squeeze(np.diff([list(map(test_stat, p1, p2)) for i in range(self.nsamples)])) p_val = test_stat(diffs > t_diff) if self.theor: ttest = pg.ttest(x=data1, y=data2).round(2) return t_diff, sample_test, p_val, ttest else: return t_diff, sample_test, p_val
def perform_kappa_ttest(self): # MIXED ANOVA ------------------------------------------------------------------------------------------------ print( "\nPerforming unpaired T-test between activity groups on Cohen's Kappa values." ) high = self.df_kappa_long.groupby("Group").get_group("HIGH")["Kappa"] low = self.df_kappa_long.groupby("Group").get_group("LOW")["Kappa"] self.kappa_ttest = pg.ttest(high, low, paired=False, correction='auto') # Approximates hedges g using d x (1 - (3 / (4*(n1 + n2) - 9)) self.kappa_ttest["hedges-g"] = self.kappa_ttest["cohen-d"] * ( 1 - (3 / (4 * 2 * high.shape[0] - 9))) print(self.kappa_ttest) self.kappa_wilcoxon = pg.wilcoxon(high, low) print(self.kappa_wilcoxon)
def describe_clusters(df:pd.DataFrame, labs: np.array, dec:int)->None: '''Lập bảng so sánh 10 biến giữa 2 phân cụm :df: dataframe dữ liệu gốc, :labs: chuỗi labels cho 2 phân cụm :dec: số lẻ cho việc làm tròn Kết quả là 1 dataframe bảng thống kê mô tả và kiểm định t, sao lưu trong thư mục Output ''' df['C'] = labs res_df = df.groupby('C').agg(lambda x: f"{np.round(np.mean(x),dec)} ± {np.round(np.std(x),dec)}").T res_df.columns = ['Cụm 1', 'Cụm 2'] res_df['Toàn thể'] = df.agg(lambda x: f"{np.round(np.mean(x),dec)} ± {np.round(np.std(x),dec)}").T res_df = res_df[['Toàn thể', 'Cụm 1', 'Cụm 2']] p_val = [] for v in res_df.index: p = pg.ttest(df[df['C'] == 0][v], df[df['C']==1][v], tail='one-sided')['p-val'][0] p_val.append(p) res_df['Giá trị p'] = p_val res_df.index = res_df.index.map(col_names) print('Kết quả thống kê mô tả 2 phân cụm:') print('='*30) print(res_df.to_string()) csv_name = os.path.join(output_folder, f"Table.xlsx") res_df.to_excel(csv_name, index = True, encoding='utf-8')
plt.ylim(0, 100) plt.yticks(range(0, 130, 20)) plt.ylabel('Time in Arena Periphery (%)') peri_ax.get_legend().remove() plt.tight_layout() #%% OF_plot.savefig('/Users/felipeantoniomendezsalcido/Desktop/OF_analysis2.png', dpi=300) OF_data # Area Timm Analysis timm_df = pd.read_csv('/Users/felipeantoniomendezsalcido/Desktop/Data/Timm Area.csv') timm_df.columns timm_df['Group'] = timm_df['Subject'].astype('str').str[0:3] timm_df['Mean Area'][timm_df['Genotype'] == 'WT'] pg.ttest(x=timm_df['Mean Area'][timm_df['Genotype'] == 'WT'], y=timm_df['Mean Area'][timm_df['Genotype'] == 'KO'], paired=False) #%% timm_fig, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(7, 3)) timm_point = sns.pointplot(x='Level', y='Mean Area', hue='Genotype', data=timm_df, palette=[ 'b', 'g'], capsize=.05, scale=.7, errorwidth=.05, ci=68, ax=a0, label=['a', 'b']) a0.set_ylabel(r'Mean Area ($\mu$m$^2$)') a0.set_xlabel('from Bregma (mm)') a0.invert_xaxis() sns.despine() timm_point.get_legend().remove() timm_fig.legend(loc='upper right', bbox_to_anchor=(.29, .93), ncol=1) timm_total = sns.barplot(x='Genotype', y='Mean Area', data=timm_df, palette=['b', 'g'], ax=a1, ci=68, capsize=0.05, errwidth=1.5) a1.set_ylabel(r'Mean Area ($\mu$m$^2$)') a1.annotate('***', xy=(0.5, .98), xytext=(0.5, .96), xycoords='axes fraction', fontsize=18, ha='center',
def graph_gPPI(): from fg_config import lgroup import matplotlib.pyplot as plt import pingouin as pg import seaborn as sns # ROIS = ['rACC','sgACC','lh_hpc','rh_hpc','lh_amyg','rh_amyg'] ROIS = ['rh_hpc'] # COPES = ['acq_ext','ext_acq'] COPES = ['ext_acq'] groups = ['healthy', 'ptsd'] df = pd.read_csv('extracted_mem_gPPI.csv') df = df.groupby(['seed', 'cope', 'target', 'subject']).mean().reset_index() df['group'] = df.subject.apply(lgroup) df = df.set_index(['cope', 'group', 'seed', 'target']).sort_index() stats = pd.DataFrame(columns=['t', 'p'], index=pd.MultiIndex.from_product( [groups, ROIS, ROIS], names=['group', 'seed', 'target'])) gstats = pd.DataFrame(columns=['t', 'p'], index=pd.MultiIndex.from_product( [ROIS, ROIS], names=['seed', 'target'])) for seed in ROIS: for group in groups: for target in ROIS: tres = pg.ttest(df.loc[('ext_acq', group, seed, target), 'conn'].values, 0, tail='two-sided') stats.loc[(group, seed, target)][['t', 'p']] = tres.loc['T-test'][['T', 'p-val']] # gres = pg.ttest(df.loc[('ext_acq','healthy',seed,target),'conn'].values,df.loc[('ext_acq','ptsd',seed,target),'conn'].values,paired=False) # gstats.loc[(seed,target)][['t','p']] = gres.loc['T-test'][['T','p-val']] # mask = np.zeros([len(ROIS),len(ROIS)]) # mask[np.diag_indices_from(mask)] = True # fig, (gax, gcbar) = plt.subplots(2,2,gridspec_kw={'height_ratios':(.9,.05),'hspace':.5}) # for j, cope in enumerate(COPES): # gt = gstats.loc[(cope),'t'].unstack(level=-1).astype(float).loc[ROIS][ROIS] # gp = gstats.loc[(cope),'p'].apply(pconvert).unstack(level=-1).astype(str).loc[ROIS][ROIS] # sns.heatmap(gt,mask=mask,ax=gax[j],square=True, # annot=gp,fmt='',cmap='PRGn',center=0,vmin=-3,vmax=3, # cbar_ax=gcbar[j],cbar_kws={'orientation':'horizontal'}) # gax[j].set_title(cope + '_group_comp') fig1, (ax1, cbar1) = plt.subplots(2, 2, gridspec_kw={ 'height_ratios': (.9, .05), 'hspace': .5 }) fig2, (ax2, cbar2) = plt.subplots(2, 2, gridspec_kw={ 'height_ratios': (.9, .05), 'hspace': .5 }) for i, group in enumerate(groups): t = stats.loc[(group), 't'].unstack(level=-1).astype(float).loc[ROIS][ROIS] p = stats.loc[(group), 'p'].apply(pconvert).unstack( level=-1).astype(str).loc[ROIS][ROIS] # sns.heatmap(t,mask=mask,ax=ax[i],square=True, # annot=p,fmt='',cmap='PRGn',center=0,vmin=-3,vmax=3, # cbar_ax=cbar[i],cbar_kws={'orientation':'horizontal'}) # # ax[i].set_title(group + '_' + cope) # pfc_targ_t = t.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'),['rACC','sgACC']].T # pfc_targ_p = p.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'),['rACC','sgACC']].T pfc_targ_t = t.loc[('rh_hpc'), ['rACC', 'sgACC']].T pfc_targ_p = p.loc[('rh_hpc'), ['rACC', 'sgACC']].T sns.heatmap(pfc_targ_t, ax=ax1[i], annot=pfc_targ_p, square=True, fmt='', cmap='PRGn', center=0, vmin=-3, vmax=3, cbar_ax=cbar1[i], cbar_kws={'orientation': 'horizontal'}) ax1[i].set_title(group + ' ext vs. acq') pfc_seed_t = t.loc[('rACC', 'sgACC'), ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T pfc_seed_p = p.loc[('rACC', 'sgACC'), ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T sns.heatmap(pfc_seed_t, ax=ax2[i], annot=pfc_seed_p, square=True, fmt='', cmap='PRGn', center=0, vmin=-3, vmax=3, cbar_ax=cbar2[i], cbar_kws={'orientation': 'horizontal'}) ax2[i].set_title(group + ' ext vs. acq') ####encoding!#### edf = pd.read_csv('extracted_encode_gPPI.csv') edf = edf.set_index(['cope', 'phase', 'seed', 'target', 'subject']) edf = (edf.loc['csp', 'extinction'] - edf.loc['csp', 'acquisition']).reset_index() edf['group'] = edf.subject.apply(lgroup) edf = edf.set_index(['group', 'seed', 'target']) estats = pd.DataFrame(columns=['t', 'p'], index=pd.MultiIndex.from_product( [groups, ROIS, ROIS], names=['group', 'seed', 'target'])) for seed in ROIS: for group in groups: for target in ROIS: etres = pg.ttest(edf.loc[(group, seed, target), 'conn'].values, 0, tail='two-sided') estats.loc[(group, seed, target)][['t', 'p' ]] = etres.loc['T-test'][['T', 'p-val']] fig3, (ax3, cbar3) = plt.subplots(2, 2, gridspec_kw={ 'height_ratios': (.9, .05), 'hspace': .5 }) fig4, (ax4, cbar4) = plt.subplots(2, 2, gridspec_kw={ 'height_ratios': (.9, .05), 'hspace': .5 }) for i, group in enumerate(groups): t = estats.loc[(group), 't'].unstack(level=-1).astype(float).loc[ROIS][ROIS] p = estats.loc[(group), 'p'].apply(pconvert).unstack( level=-1).astype(str).loc[ROIS][ROIS] # sns.heatmap(t,mask=mask,ax=ax[i],square=True, # annot=p,fmt='',cmap='PRGn',center=0,vmin=-3,vmax=3, # cbar_ax=cbar[i],cbar_kws={'orientation':'horizontal'}) # # ax[i].set_title(group + '_' + cope) pfc_targ_t = t.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'), ['rACC', 'sgACC']].T pfc_targ_p = p.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'), ['rACC', 'sgACC']].T sns.heatmap(pfc_targ_t, ax=ax3[i], annot=pfc_targ_p, square=True, fmt='', cmap='PRGn', center=0, vmin=-3, vmax=3, cbar_ax=cbar3[i], cbar_kws={'orientation': 'horizontal'}) ax3[i].set_title(group + ' ext vs. acq') pfc_seed_t = t.loc[('rACC', 'sgACC'), ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T pfc_seed_p = p.loc[('rACC', 'sgACC'), ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T sns.heatmap(pfc_seed_t, ax=ax4[i], annot=pfc_seed_p, square=True, fmt='', cmap='PRGn', center=0, vmin=-3, vmax=3, cbar_ax=cbar4[i], cbar_kws={'orientation': 'horizontal'}) ax4[i].set_title(group + ' ext vs. acq')
def graph_gPPI_better(): from fg_config import lgroup from pysurfer import bnsurf import matplotlib.pyplot as plt import pingouin as pg import seaborn as sns # sns.set_context('talk') df = pd.read_csv('extracted_mem_gPPI.csv') df['group'] = df.subject.apply(lgroup) vm = (df.target == 'sgACC') d = (df.target == 'rACC') pfc = df[d | vm] pfc = pfc[pfc.seed != 'rh_hpc'] pfc.target = pfc.target.apply(lambda x: 'vmPFC' if x == 'sgACC' else 'dACC') for cope in pfc.cope.unique(): g = sns.catplot(data=pfc[pfc.cope == cope], x='target', y='conn', hue='seed', col='group', kind='bar', palette='mako', hue_order=[ 'amyg_cem', 'amyg_bla', 'hc_head', 'hc_body', 'hc_tail' ], sharey=False, height=10, aspect=1.2) plt.subplots_adjust(top=0.9) g.fig.suptitle(cope) plt.savefig('plots/roi_conn/%s.png' % (cope), fmt='png') # seeds = ['rh_hpc','hc_tail','hc_body','hc_head','amyg_bla','amyg_cem'] seeds = ['hc_tail', 'hc_body', 'hc_head', 'amyg_bla', 'amyg_cem'] targets = [ 'A32sg', 'A32p', 'A24cd', 'A24rv', 'A14m', 'A11m', 'A13', 'A10m', 'A9m', 'A8m', 'A6m' ] # targets = ['rh_hpc','hc_tail','hc_body','hc_head','amyg_bla','amyg_cem','sgACC','rACC','A32sg','A32p','A24cd','A24rv','A14m','A11m','A13','A10m','A9m','A8m','A6m'] copes = ['ext_acq', 'ext_csp_csm', 'acq_csp_csm'] groups = ['healthy', 'ptsd'] df = df.set_index(['cope', 'group', 'seed', 'target', 'subject']) stats = pd.DataFrame(columns=['t', 'p'], index=pd.MultiIndex.from_product( [groups, seeds, copes, targets], names=['group', 'seed', 'cope', 'target'])) for seed in seeds: for group in groups: for cope in copes: for target in targets: tres = pg.ttest(df.loc[(cope, group, seed, target), 'conn'].values, 0, tail='two-sided') stats.loc[(group, seed, cope, target)][[ 't', 'p' ]] = tres.loc['T-test'][['T', 'p-val']] # stats.loc[(group,seed,cope),'p'] = pg.multicomp(list(stats.loc[(group,seed,cope),'p'].values),method='fdr_bh')[1] stats['p_mask'] = stats.p.apply(lambda x: 0 if x > .05 else 1) stats['t_disp'] = stats.t * stats.p_mask for group in groups: for seed in seeds: for cope in copes: disp = stats.loc[group, seed, cope] if disp.t_disp.min() == 0 and disp.t_disp.max() == 0: pass else: if disp.t_disp.max() > 0: cmap = 'Reds' tail = 'greater' else: cmap = 'Blues_r' tail = 'less' bnsurf(disp, 't_disp', cmap, tail=tail, out='conn/%s_%s_%s' % (group, seed, cope)) #bnsurf(data,val,cmap,tail='greater',out=None): stats = stats.reset_index() stats.loc[np.where(stats.p < 0.05)[0]]
def stats(model, quantity, data, targets, tw, rm, nd): if model == 'absolute': data = data.drop(['NormQuant'], axis=1) data['NormMean'] = data['NormMean'].astype(float) mean = 'NormMean' else: data = data.drop(['rq'], axis=1) data['rqMean'] = data['rqMean'].astype(float) mean = 'rqMean' # prepare data from intermediate dataframe data = data[data['Outliers'].eq(False)] data = data.drop_duplicates(keep='first') # t-test and anova for normally distributed data if nd == 'True': if quantity == 2: # T-Test between 2 groups stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() group = data['Group'].dropna() group = group.drop_duplicates(keep='first').values.tolist() for item in targets: df = data[data['Target Name'].eq(item)] group1 = df[df['Group'].eq(group[0])][mean] group2 = df[df['Group'].eq(group[1])][mean] t_test = ttest(group1, group2, paired=bool(rm)) if rm == 'True': t_test['paired'] = 'TRUE' else: t_test['paired'] = 'FALSE' t_test['Target Name'] = item if stats_dfs is None: stats_dfs = t_test else: stats_dfs = stats_dfs.append(t_test, ignore_index=True) # reformat output table stats_dfs = stats_dfs.rename(columns={ 'cohen-d': 'effect size', 'BF10': 'Bayes factor', 'dof': 'DF' }) cols = [ 'Target Name', 'DF', 'T', 'tail', 'paired', 'p-val', 'effect size', 'power', 'Bayes factor' ] stats_dfs = stats_dfs.reindex(columns=cols) elif quantity >= 3: # ANOVA test stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() # tukey_dfs = pandas.DataFrame() pvals = [] for item in targets: if rm == 'True': # one-way if tw == 'False': # repeated measure anova aov = pg.rm_anova( dv=mean, data=data[data['Target Name'].eq(item)], within='Group', subject='Sample Name', detailed=True) pvals.append(aov['p-unc'][0]) aov = aov.drop([1]) aov['measures'] = ['dependent'] aov['Target Name'] = item # two-way else: aov = pg.rm_anova( dv=mean, data=data[data['Target Name'].eq(item)], within=['Group1', 'Group2'], subject='Sample Name', detailed=True) reject_tw, pval_corr_tw = pg.multicomp(list( aov['p-unc']), alpha=0.05, method='bonf') aov['p-value corrected'] = pval_corr_tw aov['measures'] = ['dependent'] * 3 aov['Target Name'] = [item] * 3 aov.drop(['eps'], axis=1) ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, within='Group', subject='Sample Name', padjust='fdr_bh') ph['Target Name'] = item ph['Test'] = 'T-Test' else: # one-way if tw == 'False': aov = pg.anova(dv=mean, between='Group', data=data[data['Target Name'].eq(item)], detailed=True) pvals.append(aov['p-unc'][0]) aov = aov.drop([1]) aov['measures'] = ['independent'] aov['Target Name'] = item ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between='Group', padjust='fdr_bh') ph['Test'] = 'T-Test' # two-way else: aov = pg.anova(dv=mean, between=['Group1', 'Group2'], data=data[data['Target Name'].eq(item)], detailed=False) aov = aov.drop([3]) reject_tw, pval_corr_tw = pg.multicomp(list( aov['p-unc']), alpha=0.05, method='bonf') aov['p-value corrected'] = pval_corr_tw aov['measures'] = ['independent'] * 3 aov['Target Name'] = [item] * 3 ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between=['Group1', 'Group2'], padjust='fdr_bh') ph['Test'] = 'T-Test' ph['Target Name'] = item if stats_dfs is None: stats_dfs = aov else: stats_dfs = stats_dfs.append(aov, ignore_index=True) if posthoc_dfs is None: posthoc_dfs = ph else: posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True) reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf') # reformat output tables stats_dfs = stats_dfs.rename(columns={ 'p-unc': 'p-value', 'np2': 'effect size' }) if tw == 'False': stats_dfs['p-value corrected'] = pvals_corr stats_dfs['distribution'] = ['parametric'] * len(targets) stats_dfs['test'] = ['ANOVA'] * len(targets) stats_dfs['statistic'] = ['NA'] * len(targets) else: stats_dfs['distribution'] = ['parametric'] * (len(targets) * 3) stats_dfs['test'] = ['ANOVA'] * (len(targets) * 3) stats_dfs['statistic'] = ['NA'] * (len(targets) * 3) cols = [ 'Target Name', 'Source', 'DF', 'F', 'MS', 'SS', 'p-value', 'p-value corrected', 'measures', 'distribution', 'test', 'statistic', 'effect size' ] stats_dfs = stats_dfs.reindex(columns=cols) if tw == 'False': posthoc_dfs = posthoc_dfs.drop(['Contrast', 'T'], axis=1) else: posthoc_dfs = posthoc_dfs.drop(['T'], axis=1) posthoc_dfs = posthoc_dfs.rename( columns={ 'hedges': 'effect size', 'p-corr': 'p-value corrected', 'p-unc': 'p-value', 'p-adjust': 'correction method', 'BF10': 'Bayes factor', 'dof': 'DF' }) if tw == 'False': cols2 = [ 'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] else: cols2 = [ 'Target Name', 'Contrast', 'Group1', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] posthoc_dfs = posthoc_dfs.reindex(columns=cols2) # nonparametric tests for not normally distributed data else: if quantity == 2: stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() group = data['Group'].dropna() group = group.drop_duplicates(keep='first').values.tolist() for item in targets: df = data[data['Target Name'].eq(item)] group1 = df[df['Group'].eq(group[0])][mean] group2 = df[df['Group'].eq(group[1])][mean] if rm == 'True': # Mann-Whitney U test test = mannwhitneyu(group1, group2) test = pandas.DataFrame( { 'Target Name': item, 'pvalue': test.pvalue, 'statistic': test.statistic }, index=[0]) else: # Wilcoxon test = wilcoxon(group1, group2) test = pandas.DataFrame( { 'Target Name': item, 'pvalue': test.pvalue, 'statistic': test.statistic }, index=[0]) if stats_dfs is None: stats_dfs = test else: stats_dfs = stats_dfs.append(test, ignore_index=True) elif quantity >= 3: stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() pvals = [] for item in targets: if rm == 'True': # friedman test for repeated measurements df = pg.friedman(dv=mean, within='Group', subject='Sample Name', data=data[data['Target Name'].eq(item)]) pvals.append(df['p-unc'][0]) df['test'] = ['Friedman Q'] df['measures'] = ['dependent'] df = df.rename(columns={'Q': 'statistic'}) df['Target Name'] = item df['DF'] = 'NA' ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, within='Group', subject='Sample Name', padjust='fdr_bh', parametric=False) ph['Target Name'] = item ph['DF'] = 'NA' ph['Bayes factor'] = 'NA' ph['Test'] = 'Wilcoxon' else: # Kruskal-Wallis H test df = pg.kruskal(dv=mean, between='Group', data=data[data['Target Name'].eq(item)]) pvals.append(df['p-unc'][0]) df['test'] = ['Kruskal-Wallis H'] df['measures'] = ['independent'] df = df.rename(columns={'H': 'statistic'}) df['Target Name'] = item df['DF'] = 'NA' ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between='Group', padjust='fdr_bh', parametric=False) ph['Target Name'] = item ph['DF'] = 'NA' ph['Bayes factor'] = 'NA' ph['Test'] = 'Mann-Whitney U' if stats_dfs is None: stats_dfs = df else: stats_dfs = stats_dfs.append(df, ignore_index=True) if posthoc_dfs is None: posthoc_dfs = ph else: posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True) reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf') # reformat output tables stats_dfs = stats_dfs.rename(columns={ 'dof': 'DF', 'p-unc': 'p-value' }) stats_dfs['p-value corrected'] = pvals_corr stats_dfs['distribution'] = ['non-parametric'] * len(targets) stats_dfs['MS'] = ['NA'] * len(targets) stats_dfs['SS'] = ['NA'] * len(targets) stats_dfs['effect size'] = ['NA'] * len(targets) cols = [ 'Target Name', 'DF', 'MS', 'SS', 'p-value', 'p-value corrected', 'measures', 'distribution', 'test', 'statistic', 'effect size' ] stats_dfs = stats_dfs.reindex(columns=cols) posthoc_dfs = posthoc_dfs.drop(['Contrast'], axis=1) posthoc_dfs = posthoc_dfs.rename( columns={ 'hedges': 'effect size', 'p-corr': 'p-value corrected', 'p-unc': 'p-value', 'p-adjust': 'correction method', 'BF10': 'Bayes factor' }) cols2 = [ 'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] posthoc_dfs = posthoc_dfs.reindex(columns=cols2) return stats_dfs, posthoc_dfs
def test_posthoc(df, dep_var, ind_vars, is_non_normal=None): # print(f'\n{dep_var}') ind_vars = sorted(ind_vars) if is_non_normal == None: normality_p = test_normality(df, dep_var, ind_vars) significants = [p for p in normality_p if p < 0.01] is_non_normal = len(significants) > 0 iv_combinations = [] for iv in ind_vars: for iv1 in ind_vars: if (iv != iv1) and ((iv, iv1) not in iv_combinations) and ( (iv1, iv) not in iv_combinations): iv_combinations.append((iv, iv1)) for comb in iv_combinations: x = df.loc[df['Condition number'] == comb[0]][dep_var] y = df.loc[df['Condition number'] == comb[1]][dep_var] try: if is_non_normal: # s, p = wilcoxon(x, y) results = pg.wilcoxon(x, y, alternative='two-sided') results = results.round(4) t = list(results['W-val'])[0] p = list(results['p-val'])[0] prefix = ' ' if p < .05: prefix = '* ' if p < .01: prefix = '** ' if p < .001: prefix = '***' print( f'{prefix}{comb} Wilco: W={round(t, 2)}, p={round(p, 3)}') else: paired = True if len(x) == len(y) else False results = pg.ttest(x, y, paired=paired, alternative='two-sided') results = results.round(4) t = list(results['T'])[0] p = list(results['p-val'])[0] prefix = ' ' if p < .05: prefix = '* ' if p < .01: prefix = '** ' if p < .001: prefix = '***' print( f'{prefix}{comb} Ttest: t={round(t, 2)}, p={round(p, 3)}') except Exception as e: print(f'Error in {comb}: {e}') return
ax2 = pg.qqplot(x2, ax=ax2) st.pyplot(fig) st.success("Levene test for homoscedasticity of variances") homoscedasticity = pg.homoscedasticity(df, dv=x_var, group=y_var) st.write(homoscedasticity) if param_vs_nonparam == "Parametric tests (Student, Welch)": if homoscedasticity.loc["levene", "pval"] < 0.05: test_message = "Welch test results:" else: test_message = "Student t-test results:" st.success(test_message) t = pg.ttest(x1, x2) st.write(t) else: test_message = "Mann-Whitney test results:" st.success(test_message) mw = pg.mwu(x1, x2) st.write(mw) md = markdown.Markdown() ipsum_path = Path('Md/student_help.md') data = ipsum_path.read_text(encoding='utf-8') html = md.convert(data) # help_markdown = util.read_markdown_file("help.md")
fig, axes = plt.subplots(2, 2, figsize=(9, 4)) metric_types = ['magnitude', 'n_spindles', 'amplitude', 'duration'] p_all = np.zeros((4, 4)) for j_metric_type, metric_type in enumerate(metric_types): df_metric_type = stats_df_all.query( 'metric_type=="{}"'.format(metric_type)) for j_fb_type, fb_type in enumerate(fb_types): ax = axes[j_metric_type // 2, j_metric_type % 2] df = df_metric_type.query('fb_type=="{}"'.format(fb_type)) pd.set_option('display.max_columns', 500) res = ttest(df.query('baseline=="After"')['metric'], df.query('baseline=="Before"')['metric'], paired=True) # res = pairwise_ttests(df, dv='metric', within='baseline', subject='subj_id') p = res['p-val'].values[0] p_all[j_fb_type, j_metric_type] = p res_str = '$p_u$={:.3f}\n'.format( p) + r'$Diff_{CI95}$=' + '[{}, {}]'.format(*res['CI95%'].values[0]) x_before = df.query('baseline=="Before"')['metric'].values x_after = df.query('baseline=="After"')['metric'].values for j in range(len(x_before)): pair = np.array([x_before[j], x_after[j]]) ax.plot(np.array([0, 2]) + 3 * j_fb_type, pair, '--o', color='C3' if p < 0.05 else 'k',
'Error': rmaPTER, 'AbsError': rmaAbsPTER, 'RT': rmaRT } df_PT = pd.DataFrame(data=d2) df_PT = df_PT[df_PT.AbsError.notnull()] # no nan assert (df_PT.PT.isnull().sum() == 0) df_mean_nn = df_mean[df_mean.AbsError.notnull()] # dropping null values #################################### # # Running Statistical Tests ###################################### # t-test for comparison to Sven's analysis ttestSNR = pingouin.ttest(loSNR, hiSNR, paired=True) # correction='auto' # rm_anova for SNR on Error rm_SNR = pingouin.rm_anova(data=df_mean_nn, dv='AbsError', within=['SNR'], subject='Sub') print(rm_SNR) # # MLM for SNR on Error # mlm_SNR = smf.mixedlm("AbsError ~ SNR", df_mean_nn, groups=df_mean_nn["Sub"]) # mdf_SNR = mlm_SNR.fit() # print(mdf_SNR.summary()) # # A = np.identity(len(mdf_SNR.params)) # A = A[1:,:]
fa_IM_B = 1 - Data_IM_B[(Data_IM_B['SameDifferent'] == 'D')].groupby( ['Participant'])['isCorrect'].mean() d_IM_B = SDT(hit_IM_B.tolist(), fa_IM_B.tolist()) ## one-sample t test, whether d prime is different from zero # in all conditions, the d prime was significant larger than zero, except p8!!! #t1, p1 = stats.ttest_1samp(d_CA_T,0.0) #t2, p2 = stats.ttest_1samp(d_CM_T,0.0) #t3, p3 = stats.ttest_1samp(d_IA_T,0.0) #t4, p4 = stats.ttest_1samp(d_IM_T,0.0) #t5, p5 = stats.ttest_1samp(d_CA_B,0.0) #t6, p6 = stats.ttest_1samp(d_CM_B,0.0) #t7, p7 = stats.ttest_1samp(d_IA_B,0.0) #t8, p8 = stats.ttest_1samp(d_IM_B,0.0) e1 = ttest(d_CA_T, 0.0) e2 = ttest(d_CM_T, 0.0) e3 = ttest(d_IA_T, 0.0) e4 = ttest(d_IM_T, 0.0) [e1['p-val'], e2['p-val'], e3['p-val'], e4['p-val']] [e1['cohen-d'], e2['cohen-d'], e3['cohen-d'], e4['cohen-d']] e5 = ttest(d_CA_B, 0.0) e6 = ttest(d_CM_B, 0.0) e7 = ttest(d_IA_B, 0.0) e8 = ttest(d_IM_B, 0.0) [e5['p-val'], e6['p-val'], e7['p-val'], e8['p-val']] [e5['cohen-d'], e6['cohen-d'], e7['cohen-d'], e8['cohen-d']] dprime = pd.DataFrame({ 'Congruent_Aligned_Top': d_CA_T,
def gen_histograms(plot_type="histogram"): df_pg = pg.ttest(df_stats["valid_wake"], df_stats['valid_sleep'], paired=True) df_pg["Variable"] = ["Valid-Invalid"] if plot_type == "histogram": fig, axes = plt.subplots(2, 3, figsize=(10, 6)) plt.subplots_adjust(left=.05, top=.95, hspace=.25) bins = np.arange(0, 1.05, .1) axes[0][0].hist(df_stats["n_valid"], color='green', alpha=.5, edgecolor='black', bins=bins) axes[0][0].set_title("% valid (all)") axes[1][0].hist(df_stats["n_invalid"], color='red', alpha=.5, edgecolor='black', bins=bins) axes[1][0].set_title("% invalid (all)") axes[0][1].hist(df_stats["valid_wake"], color='green', alpha=.5, edgecolor='black', bins=bins) axes[0][1].set_title("% valid wake") axes[0][2].hist(df_stats["invalid_wake"], color='red', alpha=.5, edgecolor='black', bins=bins) axes[0][2].set_title("% invalid wake") axes[1][1].hist(df_stats["valid_sleep"], color='green', alpha=.5, edgecolor='black', bins=bins) axes[1][1].set_title("% valid sleep") axes[1][2].hist(df_stats["invalid_sleep"], color='red', alpha=.5, edgecolor='black', bins=bins) axes[1][2].set_title("% invalid sleep") if plot_type == 'barplot': df_desc = df_stats.describe() fig, ax = plt.subplots(1, figsize=(10, 6)) ax.bar(x=df_desc.columns, height=df_desc.loc['mean'], yerr=df_desc.loc["std"], capsize=4, color=['green', 'red'], edgecolor='black', alpha=.5) ax.set_title("Mean ± SD") if plot_type == 'boxplot': fig, ax = plt.subplots(1, figsize=(10, 6)) df_stats.boxplot(grid=False, ax=ax) if plot_type == "scatter": plt.scatter(df_stats["valid_wake"], df_stats["valid_sleep"], edgecolors='black', color='red') plt.ylabel("valid_sleep") plt.xlabel("valid_wake") plt.plot(np.arange(0, 1.1, .1), np.arange(0, 1.1, .1), color='black', linestyle='dashed') return df_pg
def quantUnpaired(imgDir, sheetName, sheetDf, showDf=False, silent=True): print("######################################## ", sheetName, " ########################################" ) if not silent else None print(sheetDf.describe()) if not silent else None statDf = pd.DataFrame(columns=[ 'COMPARISON', 'TEST', 'STATISTICS', 'P-VALUE', 'EFFECT SIZE' ]) if len(sheetDf.columns) > 2: print(sheetDf) if showDf else None aov = pg.rm_anova(sheetDf) statistic = aov['F'].values[0] pvalue = aov['p-GG-corr'].values[ 0] if 'p-GG-corr' in aov.columns.values else aov[ 'p-unc'].values[0] effsize = aov['np2'].values[0] print(sheetDf.columns.str.cat(sep=' | '), " -> ANOVA (statistic:", statistic, " p-value: ", pvalue, ")") if not silent else None statDf = statDf.append( { 'COMPARISON': 'ALL', 'TEST': "ANOVA", 'STATISTICS': statistic, 'P-VALUE': pvalue, 'EFFECT SIZE': effsize }, ignore_index=True) for i in range(len(sheetDf.columns.values)): for j in range(i + 1, len(sheetDf.columns.values)): try: df = sheetDf[[ sheetDf.columns.values[i], sheetDf.columns.values[j] ]] print(df) if showDf else None statistic, pvalue = stats.ttest_ind(*[ df.loc[~np.isnan(df[factor]), factor] for factor in df.columns.values ]) ttest_stats = pg.ttest(df[df.columns[0]], df[df.columns[1]], paired=False) statistic = ttest_stats['T'].values[0] pvalue = ttest_stats['p-val'].values[0] effsize = ttest_stats['cohen-d'].values[0] print(sheetDf.columns.values[i], '|', sheetDf.columns.values[j], " -> Student (statistic: ", statistic, ", p-value: ", pvalue, ")") if not silent else None statDf = statDf.append( { 'COMPARISON': sheetDf.columns.values[i] + '|' + sheetDf.columns.values[j], 'TEST': "Student", 'STATISTICS': statistic, 'P-VALUE': pvalue, 'EFFECT SIZE': effsize }, ignore_index=True) except ValueError as StudentError: print(sheetDf.columns.values[i], '|', sheetDf.columns.values[j], " -> Student (", StudentError, ")") if not silent else None statDf = statDf.append( { 'COMPARISON': sheetDf.columns.values[i] + '|' + sheetDf.columns.values[j], 'TEST': "Student", 'STATISTICS': -1, 'P-VALUE': -1, 'EFFECT SIZE': -1 }, ignore_index=True) BoxPlotter.BoxPlotter(filename=imgDir + '/' + sheetName + '.png', title=sheetName, sheetDf=sheetDf, statDf=statDf)
# statDF['sem'] = df.groupby(['participantsType', 'decisionSteps'])["avoidCommitPercent"].apply(calculateSE) statDF = statDF[statDF['participantsType'] == 'Human'] # statDF = statDF[statDF['participantsType'] == 'RL Agent'] # statDF = statDF[statDF['decisionSteps'] == 1] # # print(statDF) # dfExpTrail.to_csv('dfExpTrail.csv') # Compute the two-way mixed-design ANOVA calAnova = 1 if calAnova: import pingouin as pg pd.set_option('max_columns', 8) stats = pg.ttest(statDF['ShowCommitmentPercent'], 0.5) print(stats) print('mean:', np.mean(statDF['ShowCommitmentPercent'])) # print(stats['p-val']) # print(stats['CI95%']) from scipy import stats pop_mean = 0.5 t, p_twotail = stats.ttest_1samp(statDF['ShowCommitmentPercent'], pop_mean) print('t=', t, 'p=', p_twotail) # from scipy import stats # a = stats.ttest_1samp(statDF['ShowCommitmentPercent'], 0.5) # print(a)
y='model_uncertainty', subject='image') print(dataset + "/" + model, float(corr_res['r']), float(corr_res['pval'])) print('\nged, p-values, each pair ged(model1) < ged(model2)') for dataset in data_images['dataset'].unique(): data_set = data_images[data_images['dataset'] == dataset] for modeli in data_set['model'].unique(): model_datai = data_set[data_set['model'] == modeli] for modelj in data_set['model'].unique(): if modeli == modelj: continue model_dataj = data_set[data_set['model'] == modelj] ged_res = pg.ttest(model_datai['ged'], model_dataj['ged'], tail='less') print(dataset + "/" + modeli + "/" + modelj, float(ged_res['p-val'])) print('\ncorrelation uncertainty, agreement') for dataset in data_samples['dataset'].unique(): data_set = data_samples[data_samples['dataset'] == dataset] for model in data_set['model'].unique(): model_data = data_set[data_set['model'] == model] corr_res = pg.rm_corr(model_data, x='annot_agreement', y='model_uncertainty', subject='image') print(dataset + "/" + model, float(corr_res['r']),
print("AL Runs Scored Variance: {}".format(al_rs.var())) # box plot fig, ax = plt.subplots(figsize=(8, 8)) sns.boxplot(x="League", y="RS", data=batting_df, palette="Set1", boxprops=dict(alpha=0.5)) ax.set(title="Runs Scored Distribution by League") plt.show() # Pooled two-sample t-test test_result = pg.ttest(al_rs, nl_rs, paired=False, alternative='greater', correction=False).round(3) print("------- Pooled two-sample t-test result -------") print(test_result.to_string()) # given the p-value is approximately 0, # we reject H0 and have a strong evidence that AL teams scored more than NL teams on average # 'RS' histogram and QQ plot fig, axes = plt.subplots(1, 2, figsize=(20, 8)) sns.histplot(batting_df['RS'], kde=True, ax=axes[0], color="navy") axes[0].set_title('Team RS Histogram') axes[1] = stats.probplot(batting_df['RS'], plot=plt) plt.title('Team RS QQ Plot') plt.show()
import matplotlib.patheffects as mpatheffects from matplotlib import rcParams rcParams['savefig.dpi'] = 300 rcParams['interactive'] = True rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = 'Arial' rcParams['axes.spines.top'] = False rcParams['axes.spines.right'] = False EXPORT_FNAME = '../results/sri-phenoll2_plot-GRANT.png' # from excel sheet lo = [13, 16, 14, 11, 31, 23, 25, 13, 12, 10, 11, 19] hi = [30, 40, 35, 15, 35, 31, 36, 11, 10, 23, 17, 19] ttest = pg.ttest(lo, hi, paired=True) yvals, yerr = zip(*[(np.mean(vals), sem(vals)) for vals in [lo, hi]]) xvals = [0, 1] COLORS = dict(lo='gainsboro', hi='cornflowerblue') color_seq = [COLORS[c] for c in ['lo', 'hi']] # legend STROKE_WIDTH = .6 FONT_SIZE = 15 LEFT_PAD = .02 # FONT_PAD = .055 BARWIDTH = .7
vara_alle vara_musik vara_sound stda_alle stda_music stda_sound #plot einer Gruppe get_ipython().magic(u'matplotlib inline') plt.plot(mean_w) plt.xlabel('Zeitpunkte') plt.ylabel('Cortisol nmol/L') plt.title("Mittelwerte der Cortisolmessungen in der Sound Gruppe") #plot zwei Gruppen gegeneinander get_ipython().magic(u'matplotlib inline') fig, ax = plt.subplots() ax.plot(mean_m, label='musik') ax.plot(mean_w, label='sound') plt.xlabel('Zeitpunkte') plt.ylabel('Cortisol nmol/L') plt.title("Mittelwerte Cortisol beide Gruppen") plt.legend() ''' Normalvertteilt?-Nein wenn p unter alpha''' stats.shapiro(mean_w) ttest(mean_w, mean_m, paired =False) '''Man Whitney U Test , angenommen nicht parametrisch''' pg.mwu(mean_w, mean_m)
color="blue", label="AM", linestyle="none") plt.xlabel("sujet") plt.ylabel("différence de cadence de modulation (%)") plt.xticks(subject) plt.legend(loc=0) plt.savefig(os.path.join(path_fig, "seuils_discrimination.png")) plt.show() #%% anova des seuils adaptatifs import pingouin as pg data_adapt = pd.read_csv("seuils_adaptatifs.txt", index_col=0) data_discr = pd.read_csv("seuils_discrimination.txt", index_col=0) adapt_am = data_adapt[data_adapt.modulation_type == "AM"] adapt_fm = data_adapt[data_adapt.modulation_type == "FM"] aov_adapt_am = pg.anova(data=adapt_am, dv="seuil", between="subject") aov_adapt_fm = pg.anova(data=adapt_fm, dv="seuil", between="subject") pg.print_table(aov_adapt_am) pg.print_table(aov_adapt_fm) #%% t-test des seuils de discrimination discr_t_test = pg.ttest(x=am_discr, y=fm_discr, paired=True, tail="one-sided") pg.print_table(discr_t_test) #discr_t_test.to_excel("t_test_seuils_discrimination.xlsx")
fig2 = alt.Chart(data_line).mark_line().encode( x='x', y=alt.Y('y', scale=alt.Scale(domain=(-30, 30)))) #%% defining reference line on y=0 vline = pd.DataFrame([{"x": 0}]) fig3 = (alt.Chart(vline).mark_rule(color="black", opacity=1.0, strokeDash=[3, 5]).encode(x="x:Q")) fig4 = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color="black", opacity=1.0, strokeDash=[3, 5]).encode(y='y') #%% drawing graphs fig5 = fig1 + fig2 + fig3 + fig4 with col4: st.write("####") st.altair_chart(fig5, use_container_width=True) #%% t test with col4: eq1 = r"y = b_{0} + b_{1}x" st.latex(eq1) eq2 = r"y = b_{0} + b_{1}x" eq2 = eq2.replace("b_{0}", f"{mean}").replace("b_{1}x", "0") st.latex(eq2) st.write("T-test results") ttest = pg.ttest(points, 0).round(2).drop(['tail', 'CI95%'], axis=1) st.write(ttest)
df.groupby('grouping').describe() display(df.describe()) print('---') # normal stats.shapiro(male) stats.shapiro(female) display(stats.shapiro(male)) display(stats.shapiro(female)) print('Om p < 0.05 --> icke-normalfördelat material.') print('---') # boxplot sns.boxplot(x='grouping', y='percent', data=df) plt.savefig('boxplot.png') # plt.show() # homogeneity of variance stats.levene(male, female) display(stats.levene(male, female)) # nollhypotesen antar att det är homogent - vilket det inte blir med det p-värdet print('Om p < 0.05 --> homogen varians.') print('---') # two-samples t-test res = pg.ttest(male, female, correction=False) display(res) # obs! hårdkodat # print("Nollhypotesen för Shapiro-Wilks säger normalfördelad grupp. Gör Mann-Whitney!")