df_v = scores_as_df(results, 'valid', em_col=em_col) df_t = scores_as_df(results, 'test', em_col=em_col) v_auprc[i, 0] = average_precision_score(df_v.label, df_v.scores_em) t_auprc[i, 0] = average_precision_score(df_t.label, df_t.scores_em) v_auprc[i, 1] = average_precision_score(df_v.label, df_v.scores_rec) t_auprc[i, 1] = average_precision_score(df_t.label, df_t.scores_rec) v_auprc[i, 2] = average_precision_score(df_v.label, combined_scores_valid[i]) t_auprc[i, 2] = average_precision_score(df_t.label, combined_scores_test[i]) fig, ax = plt.subplots(1, 1, figsize=(10,5)) metric_barplot([v_auc, t_auc, v_auprc, t_auprc], ['Validation AURC', 'Test AURC', 'Validation AUPRC', 'Test AUPRC'], [name.title() for name in scores_names+['composite']], colors=['tomato', 'coral', 'dodgerblue', 'cornflowerblue'], w=None, ax=ax, fontsize=fontsize, jitter=False, jitter_color='lightcoral') ax.set_ylabel('AURC ; AUPRC [-]', fontsize=fontsize) ax.set_title('Validation and Test AUCs for various scores', fontsize=fontsize) fig.tight_layout() fig.savefig(SAVE_PATH + 'AUCs_barplot.pdf', dpi=FIG_RES, bbox_inches='tight') # save AUC in csv for auc_data, name in zip([v_auc, t_auc], ['Validation', 'Test']): auc_df = pd.DataFrame(data=auc_data.transpose(), index=scores_names+['composite']) auc_df['mean'] = auc_df.mean(axis=1) auc_df['1.96std'] = 1.96 * auc_df.std(axis=1) auc_df.to_csv(SAVE_PATH + 'AUC_tables/' + name + '_AURC.csv')
for mod in ['DSAD', 'DMSAD']: pretrain_name = 'C' if pre == 'SimCLR' else 'AE ' if pre != 'SimCLR' or mod != 'DSAD': group_name.append(pretrain_name + mod) auc_valid.append(np.array(AUC_valid[pre][mod])) auc_test.append(np.array(AUC_test[pre][mod])) auc = [pd.DataFrame(auc_valid).T.values, pd.DataFrame(auc_test).T.values] fig, ax = plt.subplots(1, 1, figsize=(6, 4)) metric_barplot(auc, ['Validation', 'Test'], group_name, colors=['lightcoral', 'lightsalmon'], legend_kwargs=dict(loc='upper center', ncol=2, frameon=False, framealpha=0.0, fontsize=12, bbox_to_anchor=(0.5, -0.1), bbox_transform=ax.transAxes)) # pairs = [(('AE\nDSAD','Validation'),('AE\nDMSAD','Validation')), # (('Contrastive\nDSAD','Validation'),('Contrastive\nDMSAD','Validation')), # (('AE\nDSAD','Validation'),('Contrastive\nDSAD','Validation')), # (('AE\nDMSAD','Validation'),('Contrastive\nDMSAD','Validation'))] # pairs = [(('CDSAD','Validation'),('CDMSAD','Validation')), # (('AE DSAD','Validation'),('CDSAD','Validation'))] # pairs = [(('CDSAD','Test'),('CDMSAD','Test')), # (('AE DSAD','Test'),('CDSAD','Test'))] pairs = [(('AE DSAD', 'Test'), ('AE DMSAD', 'Test')), (('AE DMSAD', 'Test'), ('CDMSAD', 'Test')),
roc_auc_score(df_part['abnormal_XR'], df_part[f'AD_scores_{i+1}']) for i in range(N_score) ]) auprc.append([ average_precision_score(df_part['abnormal_XR'], df_part[f'AD_scores_{i+1}']) for i in range(N_score) ]) auc = np.array(auc).T auprc = np.array(auprc).T fig, axs = plt.subplots(1, 2, figsize=(20, 8)) if transparent: fig.set_alpha(0.0) metric_barplot([auc], serie_names=['AUC'], group_names=df.body_part.unique(), colors=['coral'], ax=axs[0]) axs[0].set_ylabel('AUC [-]') metric_barplot([auprc], serie_names=['AUPRC'], group_names=df.body_part.unique(), colors=['lightskyblue'], ax=axs[1]) axs[1].set_ylabel('AUPRC [-]') fig.savefig(FIGURE_PATH + 'body_part_auc.pdf', dpi=dpi, bbox_inches='tight') plt.show()
def main(expe_folder, rep, data_info_path, pretrain, model, frac_abnormal): """ Generate a Figure for the results of the provided SimCLR-AD or AE-AD experiment. """ if not os.path.isdir(expe_folder + 'analysis/'): os.makedirs(expe_folder + 'analysis/', exist_ok=True) ################################# LOAD DATA ################################ # load data_info df_info = pd.read_csv(data_info_path) df_info = df_info.drop(df_info.columns[0], axis=1) df_info = df_info[df_info.low_contrast == 0] # Get valid and test set spliter = MURA_TrainValidTestSplitter(df_info, train_frac=0.5, ratio_known_normal=0.05, ratio_known_abnormal=frac_abnormal, random_state=42) spliter.split_data(verbose=False) valid_df = spliter.get_subset('valid') test_df = spliter.get_subset('test') # load results with open(expe_folder + f'results/results_{rep}.json', 'r') as f: results = json.load(f) # concat valid and test 512 and 128 embedding of SimCLR df = [] for set, df_set in zip(['valid', 'test'], [valid_df, test_df]): df_tmp = df_set.copy() \ .drop(columns=['patient_any_abnormal', 'body_part_abnormal', 'low_contrast', 'semi_label']) cols = ['idx', '512_embed', '128_embed'] if pretrain == 'SimCLR' else ['idx', 'label', 'AE_score', '512_embed', '128_embed'] df_scores = pd.DataFrame(data=results[pretrain][set]['embedding'], columns=cols) \ .set_index('idx') df_scores['set'] = set df.append(pd.merge(df_tmp, df_scores, how='inner', left_index=True, right_index=True)) # concat valid and test df_sim = pd.concat(df, axis=0) # concat valid and test scores and embedding of DSAD df = [] for set, df_set in zip(['valid', 'test'], [valid_df, test_df]): df_tmp = df_set.copy() \ .drop(columns=['patient_any_abnormal', 'body_part_abnormal', 'low_contrast', 'semi_label']) cols = ['idx', 'label', 'ad_score', 'sphere_idx','128_embed'] if model == 'DMSAD' else ['idx', 'label', 'ad_score', '128_embed'] df_scores = pd.DataFrame(data=results['AD'][set]['scores'], columns=cols) \ .set_index('idx') \ .drop(columns=['label']) df_scores['set'] = set df.append(pd.merge(df_tmp, df_scores, how='inner', left_index=True, right_index=True)) # concat valid and test df_ad = pd.concat(df, axis=0) ############################# INITIALIZE FIGURE ############################ if model == 'DSAD': fig = plt.figure(figsize=(24,33)) gs = fig.add_gridspec(nrows=5, ncols=24, hspace=0.4, wspace=5, height_ratios=[2/15, 4/15, 4/15, 2/15, 3/15]) else: fig = plt.figure(figsize=(24,40)) gs = fig.add_gridspec(nrows=6, ncols=24, hspace=0.4, wspace=5, height_ratios=[2/19, 4/19, 4/19, 4/19, 2/19, 3/19]) ################################ PLOT LOSS ################################# pretrain_name = 'Contrastive' if pretrain == 'SimCLR' else pretrain ax_loss_sim = fig.add_subplot(gs[0, :12]) epoch_loss = np.array(results[pretrain]['train']['loss']) plot_loss(epoch_loss, ax_loss_sim, title=f'{pretrain_name} Loss Evolution') ax_loss_ad = fig.add_subplot(gs[0, 12:]) epoch_loss = np.array(results['AD']['train']['loss']) plot_loss(epoch_loss, ax_loss_ad, title=f'{model} Loss Evolution') ############################### PLOT T-SNE ################################ df_sim_val = df_sim[df_sim.set == 'valid'] embed2D = np.stack(df_sim_val['512_embed'].values, axis=0) labels = df_sim_val.abnormal_XR.values body_part = df_sim_val.body_part.values # by body_part ax_repr_sim512 = fig.add_subplot(gs[1, :8]) plot_tSNE_bodypart(embed2D, body_part, ax_repr_sim512, title=f't-SNE Representation of {pretrain_name} 512-Dimensional Space \nBy Body Part', legend=False) # by labels ax_repr_sim512 = fig.add_subplot(gs[2, :8]) plot_tSNE_label(embed2D, labels, ax_repr_sim512, title=f't-SNE Representation of {pretrain_name} 512-Dimensional Space\nBy Labels', legend=False) embed2D = np.stack(df_sim_val['128_embed'].values, axis=0) labels = df_sim_val.abnormal_XR.values body_part = df_sim_val.body_part.values # by body_part ax_repr_sim128 = fig.add_subplot(gs[1, 8:16]) plot_tSNE_bodypart(embed2D, body_part, ax_repr_sim128, title=f't-SNE Representation of {pretrain_name} 128-Dimensional Space \nBy Body Part', legend=True) # by labels ax_repr_sim128 = fig.add_subplot(gs[2, 8:16]) plot_tSNE_label(embed2D, labels, ax_repr_sim128, title=f't-SNE Representation of {pretrain_name} 128-Dimensional Space\nBy Labels', legend=True) df_ad_val = df_ad[df_ad.set == 'valid'] embed2D = np.stack(df_ad_val['128_embed'].values, axis=0) labels = df_ad_val.abnormal_XR.values body_part = df_ad_val.body_part.values # by body part ax_repr_AD128 = fig.add_subplot(gs[1, 16:]) plot_tSNE_bodypart(embed2D, body_part, ax_repr_AD128, title=f't-SNE Representation of {model} 128-Dimensional Space \nBy Body Part', legend=False) # by labels ax_repr_AD128 = fig.add_subplot(gs[2, 16:]) plot_tSNE_label(embed2D, labels, ax_repr_AD128, title=f't-SNE Representation of {model} 128-Dimensional Space\nBy Labels', legend=False) ########################## PLOT SPHERE DIAGNOSTIC ########################## if model == 'DMSAD': # distribution by sphere ax_sphere_dist = fig.add_subplot(gs[3, :16]) plot_sphere_dist(df_ad_val[df_ad_val.abnormal_XR == 0], ax_sphere_dist, title='Body Part Distribution by Sphere') # tSNE by sphere embed2D = np.stack(df_ad_val['128_embed'].values, axis=0) sphere_index = df_ad_val.sphere_idx.values ax_sphere_tSNE = fig.add_subplot(gs[3, 16:]) plot_tSNE_sphere(embed2D, sphere_index, ax_sphere_tSNE, title=f't-SNE Representation of {model} 128-Dimensional Space\n by Sphere', legend=True) ######################### PLOT SCORE DISTRIBUTION ######################### df_ad_val = df_ad[df_ad.set == 'valid'] ad_scores = df_ad_val.ad_score.values labels = df_ad_val.abnormal_XR.values body_part = df_ad_val.body_part.values # all ax_score_all = fig.add_subplot(gs[-2, :3]) plot_score_dist(ad_scores, labels, ax_score_all, title='All Scores', legend=False) ax_score_all.set_ylabel('Count [-]', fontsize=12) # by body part for i, bp in enumerate(np.unique(body_part), start=1): ax_score = fig.add_subplot(gs[-2, 3*i:3*(i+1)], sharey=ax_score_all, sharex=ax_score_all) plot_score_dist(ad_scores[body_part == bp], labels[body_part == bp], ax_score, title=f'{bp.title()} Scores', legend=False) ########################## PLOT AUC AND ROC CURVE ######################### # ROC ax_roc = fig.add_subplot(gs[-1, :6]) ad_scores = df_ad.ad_score.values labels = df_ad.abnormal_XR.values set = df_ad.set.values plot_ROC(ad_scores, labels, set, ax_roc, title='ROC curve') # AUC Barplot ax_auc = fig.add_subplot(gs[-1, 6:]) ad_scores = df_ad.ad_score.values labels = df_ad.abnormal_XR.values body_part = df_ad.body_part.values set = df_ad.set.values valid_auc, names = get_AUC_list(ad_scores[set == 'valid'], labels[set == 'valid'], body_part[set == 'valid']) test_auc, names = get_AUC_list(ad_scores[set == 'test'], labels[set == 'test'], body_part[set == 'test']) metric_barplot([valid_auc, test_auc], ['Validation', 'Test'], names, ['lightsalmon', 'peachpuff'], gap=1, ax=ax_auc, fontsize=12) ax_auc.set_title('Overall AUC scores and AUC by Body Part', fontsize=12, fontweight='bold') ################################ SAVE FIGURE ################################### fig.savefig(expe_folder + f'analysis/summary_{rep}.pdf', dpi=200, bbox_inches='tight')
index_col=0).loc[folder[1]][:-2]) data_auprc.append(pd.read_csv(OUTPUT_PATH + folder[0] + '/analysis/AUC_Tables/Test_AUPRC.csv', index_col=0).loc[folder[1]][:-2]) data_auc = pd.concat(data_auc, axis=1).values data_auprc = pd.concat(data_auprc, axis=1).values #%%############################################################################# # Figures # ################################################################################ fig, ax = plt.subplots(1, 1, figsize=(16,8)) if transparent: fig.set_alpha(0) metric_barplot([data_auc, data_auprc], ['AUC', 'AUPRC'], names, fontsize=14, colors=['salmon', 'skyblue'], w=0.45, ax=ax, gap=len(expe_semisupervised), legend_kwargs=dict(loc='upper center', ncol=2, frameon=True, framealpha=1.0, fontsize=13, bbox_to_anchor=(0.5, -0.15), bbox_transform=ax.transAxes)) #ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right') ax.set_ylabel('AUC [-] , AUPRC [-]') ax.annotate('Semi-Supervised', xy=(0.215, 1.02), xytext=(0.215, 1.1), xycoords='axes fraction', fontsize=13, ha='center', va='center', rotation=0, bbox=dict(boxstyle='square', fc='white', lw=0), arrowprops=dict(arrowstyle='-[, widthB=15, lengthB=0.5', lw=1)) ax.annotate('Unsupervised', xy=(0.69, 1.02), xytext=(0.69, 1.1), xycoords='axes fraction', fontsize=13, ha='center', va='center', rotation=0, bbox=dict(boxstyle='square', fc='white', lw=0), arrowprops=dict(arrowstyle='-[, widthB=22, lengthB=0.5', lw=1)) fig.tight_layout() if save_fig: fig.savefig(FIGURE_PATH+'results_barplot.pdf', dpi=FIG_RES, bbox_inches='tight') plt.show()
t_auc[i] = results['test']['auc'] df_v = pd.DataFrame(data=np.array(results['valid']['scores']), columns=['index', 'label', 'scores']) df_t = pd.DataFrame(data=np.array(results['test']['scores']), columns=['index', 'label', 'scores']) v_auprc[i] = average_precision_score(df_v.label, df_v.scores) t_auprc[i] = average_precision_score(df_t.label, df_t.scores) fig, ax = plt.subplots(1, 1, figsize=(5, 7)) metric_barplot( [v_auc, t_auc, v_auprc, t_auprc], ['Validation AURC', 'Test AURC', 'Validation AUPRC', 'Test AUPRC'], [f'{exp_name} anomaly score'], colors=['tomato', 'coral', 'dodgerblue', 'cornflowerblue'], w=None, ax=ax, fontsize=fontsize, jitter=False, jitter_color='lightcoral') ax.set_ylabel('AURC ; AUPRC [-]', fontsize=fontsize) ax.set_title('Validation and Test AUCs for the anomaly scores', fontsize=fontsize) fig.tight_layout() fig.savefig(SAVE_PATH + 'AUCs_barplot.pdf', dpi=FIG_RES, bbox_inches='tight') # save AUC in csv for auc_data, name in zip([v_auc, t_auc], ['Validation', 'Test']):