# In[18]: stats = pd.DataFrame(index=phenos, columns=['test_stat', 'pval']) for i, pheno in enumerate(phenos): x = df.loc[df.loc[:, 'sex'] == 1, pheno] # x = df.loc[np.logical_and(df[train_test_str] == 1,df['sex'] == 1),pheno] y = df.loc[df.loc[:, 'sex'] == 2, pheno] # y = df.loc[np.logical_and(df[train_test_str] == 1,df['sex'] == 2),pheno] test_output = sp.stats.ttest_ind(x, y) stats.loc[pheno, 'test_stat'] = test_output[0] stats.loc[pheno, 'pval'] = test_output[1] stats.loc[:, 'pval_corr'] = get_fdr_p(stats.loc[:, 'pval']) stats.loc[:, 'sig'] = stats.loc[:, 'pval_corr'] < 0.05 np.round(stats.astype(float), 2) # In[19]: f, ax = plt.subplots(1, len(phenos)) f.set_figwidth(len(phenos) * 1.4) f.set_figheight(1.25) # sex: 1=male, 2=female for i, pheno in enumerate(phenos): x = df.loc[df.loc[:, 'sex'] == 1, pheno] # x = df.loc[np.logical_and(df[train_test_str] == 1,df['sex'] == 1),pheno] sns.kdeplot(x, ax=ax[i], label='male', color='b')
assign_p=assign_p, nulldir=nulldir) elif assign_p == 'parametric': df_pheno_z = run_pheno_correlations(df.loc[:, phenos], df_z, method=method, assign_p=assign_p) # In[22]: # correct multiple comparisons. We do this across brain regions and phenotypes (e.g., 400*6 = 2400 tests) df_p_corr = pd.DataFrame(index=df_pheno_z.index, columns=['p-corr']) # output dataframe for metric in metrics: p_corr = get_fdr_p(df_pheno_z.loc[:, 'p'].filter( regex=metric)) # correct p-values for metric p_corr_tmp = pd.DataFrame( index=df_pheno_z.loc[:, 'p'].filter(regex=metric).index, columns=['p-corr'], data=p_corr) # set to dataframe with correct indices df_pheno_z.loc[p_corr_tmp.index, 'p-corr'] = p_corr_tmp # store using index matching # In[23]: for pheno in phenos: for metric in metrics: print( pheno, metric, np.sum( df_pheno_z.filter(regex=metric, axis=0).filter(
# # 1) Regions where there is a significant relationship between age and the regional brain features in the training set # # 2) Regions where the normative model was able to perform out of sample predictions (as index by standardized mean squared error < 1) # # 3) Regions where extreme deviations occur # ### 1) Age effects # In[26]: # age effect on training set df_age_effect = run_corr(df_train[primary_covariate], df_node_train, typ='pearsonr') df_age_effect['p_fdr'] = get_fdr_p(df_age_effect['p']) if parc_str == 'lausanne': df_age_effect.drop(my_list, axis=0, inplace=True) age_alpha = 0.05 age_filter = df_age_effect['p_fdr'].values < age_alpha age_filter.sum() # ### 2) Normative model performance # In[27]: smse_thresh = 1 smse_filter = df_smse.values < smse_thresh smse_filter = smse_filter.reshape(-1) smse_filter.sum()
nulldir=nulldir) elif assign_p == 'parametric': df_pheno = run_pheno_correlations(df.loc[:, phenos], df_node, method=method, assign_p=assign_p) # In[12]: # correct multiple comparisons. We do this across brain regions and phenotypes (e.g., 400*6 = 2400 tests) df_p_corr = pd.DataFrame(index=df_pheno.index, columns=['p-corr']) # output dataframe for metric in metrics: p_corr = get_fdr_p( df_pheno.loc[:, 'p'].filter(regex=metric)) # correct p-values for metric p_corr_tmp = pd.DataFrame( index=df_pheno.loc[:, 'p'].filter(regex=metric).index, columns=['p-corr'], data=p_corr) # set to dataframe with correct indices df_pheno.loc[p_corr_tmp.index, 'p-corr'] = p_corr_tmp # store using index matching # In[13]: alpha = 0.05 print(alpha) # In[14]: