def plotBinsSurfacePercentage(data, rootdir, flag_all_ranges=False): '''plot boxplots per range of mm versus percentage of surface covered - all patients''' fig, axes = plt.subplots(figsize=(12, 16)) # Horizontal box plot bplot = plt.boxplot(data, vert=True, # vertical box aligmnent patch_artist=True, showmeans=True) # fill with color # set the axes ranges and the names plt.setp(bplot['medians'], color='black',linewidth=1.5) plt.setp(bplot['means'], marker='D', markeredgecolor='darkred', markerfacecolor='darkred', label='Mean') if flag_all_ranges is True: labels_neg = ['(-'+ str(x-1)+':-'+str(x)+')' for x in range(21,0,-1)] labels_neg[20] = '(-1:0)' labels_pos = ['('+ str(x-1)+':'+str(x)+')' for x in range(1,22)] xticklabels = labels_neg+labels_pos xtickNames = plt.setp(axes, xticklabels=xticklabels) plt.setp(xtickNames, rotation=45, fontsize=6) else: xticklabels = [r"$(\infty< x < 0$)", r"$(0 \leqslant x \leqslant 5$)", r"$( x > 5)"] xtickNames = plt.setp(axes, xticklabels=xticklabels) plt.setp(xtickNames, fontsize=14, color='black') axes.tick_params(colors='black') handles, labels = plt.gca().get_legend_handles_labels() by_label = OrderedDict(zip(labels, handles)) plt.legend(by_label.values(), by_label.keys(), fontsize=14) plt.xlabel('Range of Distances [mm]', fontsize=14, color='black') plt.ylabel('Tumor Surface covered by Ablation [%]', fontsize=14, color='black') plt.title('Boxplots for Percentage of Tumor Surface covered by Ablation. 10 Cases (pooled)', fontsize=14) # save figure if flag_all_ranges is True: figName_hist = 'Boxplots_TheHistogramDistances_AllRanges.png' else: figName_hist = 'Boxplots_TheHistogramDistances.png' figpathHist = os.path.join(rootdir, figName_hist) gh.save(figpathHist, width=12, height=10)
def interpolation_fct(df_ablation, df_radiomics, title, fontsize=24, flag=None, flag_energy_axis=False, flag_lin_regr=False): """ :param df_ablation: :param df_radiomics: :param title: :param fontsize: :param flag: :param flag_energy_axis: :return: """ # perform interpolation as a function of power and time (multivariate interpolation) points_power = np.asarray(df_ablation['Power']).reshape( (len(df_ablation), 1)) points_time = np.asarray(df_ablation['Time_Duration_Applied']).reshape( (len(df_ablation), 1)) power_and_time_brochure = np.hstack((points_power, points_time)) ablation_vol_brochure = np.asarray( df_ablation['Ablation Volume [ml]_brochure']).reshape( (len(df_ablation), 1)) df_radiomics.dropna(subset=['Power', 'Time_Duration_Applied'], inplace=True) grid_x = df_radiomics['Power'].to_numpy() grid_y = df_radiomics['Time_Duration_Applied'].to_numpy() grid_x = np.array(pd.to_numeric(grid_x, errors='coerce')) grid_y = np.array(pd.to_numeric(grid_y, errors='coerce')) grid_x = grid_x.reshape(len(grid_x), 1) grid_y = grid_y.reshape(len(grid_y), 1) power_and_time_effective = np.asarray(np.hstack((grid_x, grid_y))) ablation_vol_interpolated_brochure = griddata(power_and_time_brochure, ablation_vol_brochure, power_and_time_effective, method='linear') ablation_vol_interpolated_brochure = ablation_vol_interpolated_brochure.reshape( len(df_radiomics), ) ablation_vol_measured = np.asarray( df_radiomics['Ablation Volume [ml]']).reshape(len(df_radiomics), ) # %% PLOT BOXPLOTS boxplots_PAV_EAV.plot_boxplots_volumes(ablation_vol_interpolated_brochure, ablation_vol_measured, flag_subcapsular='all') # %% PLOT SCATTER PLOTS fig, ax = plt.subplots() if flag == 'Tumour Volume [ml]': size_values = np.asarray(df_radiomics['Tumour Volume [ml]']).reshape( len(df_radiomics), ) df = pd.DataFrame(data=dict(x=ablation_vol_interpolated_brochure, y=ablation_vol_measured, sizes=size_values)) df.dropna(inplace=True) bins = np.arange(start=0, stop=30, step=6) grouped = df.groupby(np.digitize(df.sizes, bins)) sizes = [150 * (i + 1.) for i in range(5)] labels = ['0-5', '5-10', '15-20', '20-25', '25-30'] nr_samples = len(df) for i, (name, group) in enumerate(grouped): plt.scatter(group.x, group.y, s=sizes[i], alpha=0.5, label=labels[name - 1]) legend1 = ax.legend(labelspacing=1, borderpad=0.75, title='Tumor Volume [ml]', handletextpad=1.5, loc='upper right', fontsize=fontsize - 2, title_fontsize=fontsize - 2) ax.add_artist(legend1) ax.tick_params(axis='y', labelsize=fontsize) ax.tick_params(axis='x', labelsize=fontsize) plt.tick_params(labelsize=fontsize, color='k') elif flag == 'No. chemo cycles': size_values = np.asarray(df_radiomics['no_chemo_cycle']).reshape( len(df_radiomics), ) df = pd.DataFrame() df['x'] = ablation_vol_interpolated_brochure df['y'] = ablation_vol_measured df['sizes'] = size_values df.dropna(inplace=True) nr_samples = len(df) bins = np.arange(start=0, stop=12, step=4) print(np.digitize(df.sizes, bins, right=True)) grouped = df.groupby(np.digitize(df.sizes, bins, right=True)) sizes = [150 * (i + 1.) for i in range(4)] labels = ['0', '1-4', '5-8', '9-12'] for i, (name, group) in enumerate(grouped): plt.scatter(group.x, group.y, s=sizes[i], alpha=0.5, label=labels[name]) legend1 = ax.legend(labelspacing=1, borderpad=0.75, title='Chemotherapy Cycles', handletextpad=1.5, loc='upper right', fontsize=fontsize - 2, title_fontsize=fontsize - 2) ax.add_artist(legend1) elif flag_energy_axis: energy = df_radiomics['Energy [kj]'] df = pd.DataFrame(data=dict(x=ablation_vol_interpolated_brochure, energy=energy, y=ablation_vol_measured)) df.dropna(inplace=True) nr_samples = len(df) ax2 = ax.twiny() ax.scatter(df.x, df.y, color='steelblue', marker='o', s=100, alpha=0.8) ax.set_ylabel('Effective Ablation Volume [ml]', fontsize=fontsize) ax.set_xlabel('Predicted Ablation Volume Brochure [ml]', fontsize=fontsize, color='steelblue') ax.tick_params(axis='x', labelcolor='steelblue') ax.set_ylim([0, 100]) ax.set_xlim([0, 100]) ax2.scatter(df.energy, df.y, color='purple', marker='*', s=100, alpha=0.5) ax2.set_xlabel('Energy [kj]', color='purple', fontsize=fontsize) ax2.tick_params(axis='x', colors='purple') ax2.set_ylim([0, 100]) ax2.set_xlim([0, 100]) else: df = pd.DataFrame(data=dict(x=ablation_vol_interpolated_brochure, y=ablation_vol_measured)) df.dropna(inplace=True) sc = plt.scatter(df.x, df.y, color='steelblue', marker='o', s=100) nr_samples = len(df) plt.ylabel('Effective Ablation Volume [ml]', fontsize=fontsize) if flag_energy_axis is False: plt.xlabel('Predicted Ablation Volume Brochure [ml]', fontsize=fontsize) plt.ylim([0, 100]) plt.xlim([0, 100]) if flag_lin_regr is True: # get the data ready for linear regression X = np.asarray(df.x).reshape(len(df.x), 1) Y = np.asarray(df.y).reshape(len(df.y), 1) regr = linear_model.LinearRegression() regr.fit(X, Y) SS_tot = np.sum((Y - np.mean(Y))**2) residuals = Y - regr.predict(X) SS_res = np.sum(residuals**2) r_squared = 1 - (SS_res / SS_tot) correlation_coef = np.corrcoef(X[:, 0], Y[:, 0])[0, 1] label_r2 = r'$R^2:{0:.2f}$'.format(r_squared) label_r = r'$r: {0:.2f}$'.format(correlation_coef) ax.tick_params(axis='y', labelsize=fontsize) ax.tick_params(axis='x', labelsize=fontsize) plt.tick_params(labelsize=fontsize, color='k') reg = plt.plot(X, regr.predict(X), color='orange', linewidth=2, label='Linear Regression') plt.plot([], [], ' ', label=label_r) plt.plot([], [], ' ', label=label_r2) plt.legend(fontsize=fontsize, loc='upper right', labelspacing=1) if flag is not None: # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='white', edgecolor='gray') textstr = title + ' (n = ' + str(nr_samples) + ' )' ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=20, verticalalignment='top', bbox=props) figpathHist = os.path.join("figures", title + flag + '_ablation_vol_interpolated') else: props = dict(boxstyle='round', facecolor='white', edgecolor='gray') textstr = title + ' (n = ' + str(nr_samples) + ' )' ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=20, verticalalignment='top', bbox=props) plt.legend(fontsize=fontsize, loc='upper right', labelspacing=1) figpathHist = os.path.join("figures", title + '_ablation_vol_interpolated') gh.save(figpathHist, width=12, height=12, ext=["png"], close=True, tight=True, dpi=600) return ablation_vol_interpolated_brochure
def connected_mev_miv(df_radiomics): """ Plots ablation volumes. :param df_radiomics: DataFrame containing tabular radiomics features :param ablation_vol_interpolated_brochure: column-like interpolated ablation volume from the brochure :return: Plot, saved as a PNG image """ df = pd.DataFrame() df['PAV'] = df_radiomics['Predicted_Ablation_Volume'] df['EAV'] = df_radiomics['Ablation Volume [ml]'] # df['MIV'] = df_radiomics['Inner Ellipsoid Volume'] df['MIV'] = df_radiomics['Outer Ellipsoid Volume'] / 3 df['MEV'] = df_radiomics['Outer Ellipsoid Volume'] df = df[df['MEV'] < 150] # discard df.dropna(inplace=True) # plot scatter plots on the same y axis then connect them with a vertical line fig, ax = plt.subplots() MIV = np.asarray(df['MIV']) MEV = np.asarray(df['MEV']) PAV = np.asarray(df['PAV']) EAV = np.asarray(df['EAV']) # x = df['PAV'] x = np.asarray([i for i in range(1, len(MEV) + 1)]) ax.scatter(x, MIV, marker='o', color='green', label='Maximum Inscribed Ellipsoid') ax.scatter(x, EAV, marker='o', color='orange', label='Effective Ablation Volume') ax.scatter(x, PAV, marker='o', color='blue', label='Predicted Ablation Volume') ax.scatter(x, MEV, marker='o', color='red', label='Minimum Enclosing Ellipsoid') plt.legend(loc='upper left') plt.ylabel('Volume (mL)') for i in np.arange(0, len(x)): x1, x2 = x[i], x[i] y1, y2 = MIV[i], MEV[i] plt.plot([x1, x2], [y1, y2], 'k-') # plt.ylim([-1, 150]) # labels = np.round(np.asarray(df['PAV'])) # plt.xticks(x, labels, rotation=45, fontsize=24, color='white') timestr = time.strftime("%H%M%S-%Y%m%d") folder_path = r"C:\develop\segmentation-eval\figures\MIV_MEV_ellipsoids" fig_path = os.path.join(folder_path, timestr) gh.save(fig_path, width=12, height=12, ext=["png"], close=True, tight=True, dpi=600)
def edit_save_plot(ax=None, p=None, flag_hue=None, xlabel='PAV', ylabel='EAV', device='', r_1=None, r_2=None, label_1=None, label_2=None, ratio_flag=False): """ :param ax: :param p: :param flag_hue: :param ylabel: :param device: :param r_1: :param r_2: :param label_1: :param label_2: :return: """ fontsize = 20 if flag_hue in ['vessels', 'subcapsular', 'chemotherapy', 'Tumor_Vol']: ax = p.axes[0, 0] ax.legend(fontsize=fontsize, title_fontsize=fontsize, title=device, loc='upper left') leg = ax.get_legend() L_labels = leg.get_texts() label_line_1 = r'$R^2:{0:.2f}$'.format(r_1) label_line_2 = r'$R^2:{0:.2f}$'.format(r_2) L_labels[0].set_text(label_line_1) L_labels[1].set_text(label_line_2) L_labels[2].set_text(label_1) L_labels[3].set_text(label_2) else: ax.legend(fontsize=fontsize, title_fontsize=fontsize, title=device, loc='upper right') # ax.legend(fontsize=fontsize, loc='upper right') if ratio_flag is False: plt.xlim([0, 100]) plt.ylim([0, 100]) plt.xlabel(xlabel, fontsize=fontsize) plt.ylabel(ylabel, fontsize=fontsize) timestr = time.strftime("%H%M%S-%Y%m%d") figpath = os.path.join( "figures", device + '__EAV_parametrized_PAV_groups_' + str(flag_hue) + '-' + timestr) gh.save(figpath, width=12, height=12, ext=["png"], close=True, tight=True, dpi=600)
plt.title( 'Prediction for Random Forest Model on Hold-Out Train/Test Sample: 70/30. ' 'Min samples leaf:' + str(min_samples_leaf) + '. No. estimators: ' + str(n_estimators), fontsize=10) ax.set_xlabel('Measured Ablation Volume [ml]', fontsize=10, color='k') ax.set_ylabel('Predicted Ablation Volume [ml]', fontsize=10, color='k') plt.legend(fontsize=10) plt.tick_params(labelsize=10, color='black') ax.tick_params(colors='black', labelsize=10) plt.show() figpathHist = os.path.join( "figures", "Random_Forest_Model_Accuracy_Hold_OutTrain_Test_" + 'Min_samples_leaf_' + str(min_samples_leaf) + '_No_estimators_' + str(n_estimators)) gh.save(figpathHist, ext=['png'], close=True) # %% n_estimators = 100 min_samples_leaf = 2 min_sample_split = 2 clf = RandomForestRegressor(n_estimators=n_estimators, random_state=0, min_samples_leaf=min_samples_leaf, min_samples_split=min_samples_leaf, oob_score=True) clf.fit(X, y) # print("Score of the training dataset obtained using an out-of-bag estimate: %0.2f" % clf.oob_score_) # print("Prediction computed with out-of-bag estimate on the training set: " % clf.oob_prediction_) importances = list(clf.feature_importances_) feature_list = X.columns.to_list()
def interpolation_fct(df_ablation, df_radiomics, title, fontsize=24, flag_tumor=None, lin_regr=False): """ :param df_ablation: :param df_radiomics: :param title: :param fontsize: :param flag: :param flag_energy_axis: :return: """ # perform interpolation as a function of power and time (multivariate interpolation) points_power = np.asarray(df_ablation['Power']).reshape( (len(df_ablation), 1)) points_time = np.asarray(df_ablation['Time_Duration_Applied']).reshape( (len(df_ablation), 1)) power_and_time_brochure = np.hstack((points_power, points_time)) ablation_vol_brochure = np.asarray( df_ablation['Ablation Volume [ml]_brochure']).reshape( (len(df_ablation), 1)) df_radiomics.dropna(subset=['Power', 'Time_Duration_Applied'], inplace=True) grid_x = df_radiomics['Power'].to_numpy() grid_y = df_radiomics['Time_Duration_Applied'].to_numpy() grid_x = np.array(pd.to_numeric(grid_x, errors='coerce')) grid_y = np.array(pd.to_numeric(grid_y, errors='coerce')) grid_x = grid_x.reshape(len(grid_x), 1) grid_y = grid_y.reshape(len(grid_y), 1) power_and_time_effective = np.asarray(np.hstack((grid_x, grid_y))) ablation_vol_interpolated_brochure = griddata(power_and_time_brochure, ablation_vol_brochure, power_and_time_effective, method='linear') # interchange ablation_vol_interpolated and ablation_vol_measured for plotting on the "Y-axis" ablation_vol_interpolated_brochure = ablation_vol_interpolated_brochure.reshape( len(df_radiomics), ) ablation_vol_measured = np.asarray( df_radiomics['Ablation Volume [ml]']).reshape(len(df_radiomics), ) # %% PLOT Tumor Volume vs PAV (per colours subcapsular vs. non-subcapsular) # groupby needed. fig, ax = plt.subplots() if flag_tumor == 'Tumour Volume [ml]': tumor_volume = df_radiomics['Tumour Volume [ml]'] elif flag_tumor == 'Tumour Volume + 10mm margin [ml]': tumor_volume = df_radiomics['Tumour Volume + 10mm margin [ml]'] subcapsular = df_radiomics['Proximity_to_surface'] df = pd.DataFrame(data=dict(x=tumor_volume, y=ablation_vol_interpolated_brochure, subcapsular=subcapsular)) df.dropna(inplace=True) grouped = df.groupby(subcapsular) labels = ['Deep Tumors', 'Subcapsular Tumors'] for i, (name, group) in enumerate(grouped): plt.scatter(group.x, group.y, alpha=0.5, label=labels[i], s=200) plt.legend(title=title + '(n = ' + str(len(df)) + ' )', title_fontsize=fontsize, fontsize=fontsize) plt.ylabel('Predicted Ablation Volume [ml]', fontsize=fontsize) plt.xlabel(flag_tumor, fontsize=fontsize) # plt.xlim([, 200]) ax.tick_params(axis='y', labelsize=fontsize, color='k') ax.tick_params(axis='x', labelsize=fontsize, color='k') ax.set_xscale('log') plt.ylim([-1, 100]) plt.xlim([0.03251566067053182, 684.15]) # xlims = ax.get_xlim() # plt.axis('square') plt.tick_params(labelsize=fontsize, color='black') if lin_regr is True: X = np.asarray(df.x).reshape(len(df), 1) Y = np.asarray(df.y).reshape(len(df), 1) regr = linear_model.LinearRegression() regr.fit(X, Y) SS_tot = np.sum((Y - np.mean(Y))**2) residuals = Y - regr.predict(X) SS_res = np.sum(residuals**2) r_squared = 1 - (SS_res / SS_tot) correlation_coef = np.corrcoef(X[:, 0], Y[:, 0])[0, 1] label_r2 = r'$R^2:{0:.2f}$'.format(r_squared) label_r = r'$r: {0:.2f}$'.format(correlation_coef) ax.tick_params(axis='y', labelsize=fontsize, color='k') ax.tick_params(axis='x', labelsize=fontsize, color='k') plt.tick_params(labelsize=fontsize, color='black') plt.plot([], [], ' ', label=label_r) plt.plot([], [], ' ', label=label_r2) reg = plt.plot(X, regr.predict(X), color='black', linewidth=1.5, label='Linear Regression') plt.legend(title=title, title_fontsize=fontsize, fontsize=fontsize, loc='best') plt.show() figpath = os.path.join("figures", title + '_PAV_vs_' + flag_tumor) gh.save(figpath, width=12, height=12, ext=["png"], close=True, tight=False, dpi=600)
def plot_boxplots(df): # %% boxplot chemotherapy fig, ax = plt.subplots(figsize=(12, 10)) df_chemo = df.copy() df_chemo['Ablation Volume [ml] / Energy [kJ]'] = df_chemo[ 'Ablation Volume [ml]'] / df_chemo['Energy [kj]'] df_chemo.dropna(subset=['Ablation Volume [ml] / Energy [kJ]'], inplace=True) df_chemo.dropna(subset=['chemo_before_ablation'], inplace=True) df_chemo['chemo_before_ablation'].replace('No', False, inplace=True) df_chemo['chemo_before_ablation'].replace('Yes', True, inplace=True) df.dropna(subset=['Ablation Volume [ml]'], inplace=True) df.dropna(subset=['chemo_before_ablation'], inplace=True) df['chemo_before_ablation'].replace('No', False, inplace=True) df['chemo_before_ablation'].replace('Yes', True, inplace=True) # ttest no_chemo_df = df_chemo[df_chemo['chemo_before_ablation'] == False] no_chemo = no_chemo_df['Ablation Volume [ml]'].tolist() chemo_df = df_chemo[df_chemo['chemo_before_ablation'] == True] chemo = chemo_df['Ablation Volume [ml]'].tolist() fig, ax = plt.subplots(figsize=(12, 10)) plt.hist(no_chemo) plt.title('No Chemotherapy') plt.ylabel('Ablation Volume [ml]') figpathHist = os.path.join("figures", "histogram ablation volumes no chemo") gh.save(figpathHist, ext=['png'], close=True) fig1, ax = plt.subplots(figsize=(12, 10)) plt.hist(chemo) plt.title('Chemotherapy') plt.ylabel('Ablation Volume [ml] ') figpathHist = os.path.join("figures", "histogram ablation volumes chemo") gh.save(figpathHist, ext=['png'], close=True) print('no of tumors with chemo:', str(len(chemo))) print('no of tumors with no chemo:', str(len(no_chemo))) # stat, p_chemo = shapiro(chemo) # interpret alpha_chemo = 0.05 if p_chemo > alpha_chemo: msg = 'Sample Chemo looks Gaussian (fail to reject H0)' else: msg = 'Sample Chemo does not look Gaussian (reject H0)' print(msg) stat, p_no_chemo = shapiro(no_chemo) # interpret alpha_no_chemo = 0.05 if p_no_chemo > alpha_no_chemo: msg = 'Sample No Chemo looks Gaussian (fail to reject H0)' else: msg = 'Sample No Chemo does not look Gaussian (reject H0)' print(msg) if p_no_chemo < alpha_no_chemo and p_chemo < alpha_chemo: t, p = stats.mannwhitneyu(chemo, no_chemo) print( 'mann withney u test applied for samples coming from a non Gaussian distribution:' ) print("t = " + str(t)) print("p = " + str(p)) else: t, p = stats.ttest_ind(chemo, no_chemo) print('ttest applied for samples coming from a Gaussian distribution:') print("t = " + str(t)) print("p = " + str(p)) fig, ax = plt.subplots(figsize=(12, 10)) bp_dict = df.boxplot(column=['Ablation Volume [ml]'], ax=ax, notch=True, by='chemo_before_ablation', patch_artist=True, return_type='both') ax.set_xlabel('') plt.show() for row_key, (ax, row) in bp_dict.iteritems(): for i, box in enumerate(row['fliers']): box.set_marker('o') for i, box in enumerate(row['boxes']): if i == 0: box.set_facecolor('Purple') box.set_edgecolor('DarkMagenta') else: box.set_facecolor('LightPink') box.set_edgecolor('HotPink') for i, box in enumerate(row['medians']): box.set_color(color='Black') box.set_linewidth(2) for i, box in enumerate(row['whiskers']): box.set_color(color='Black') box.set_linewidth(2) xticklabels = [ 'No Chemotherapy before Ablation', 'Chemotherapy Administered before Ablation' ] xtickNames = plt.setp(ax, xticklabels=xticklabels) plt.setp(xtickNames, fontsize=10, color='black') plt.ylim([-2, 120]) plt.ylabel('Ablation Volume [ml]', fontsize=12, color='k') plt.tick_params(labelsize=10, color='black') ax.tick_params(colors='black', labelsize=10, color='k') ax.set_ylim([-2, 120]) plt.xlabel('') fig.suptitle('') plt.title('') # plt.title('Comparison of Ratio (Ablation Volumes [ml] : Energy [kJ]) from MAVERRIC Dataset by Chemotherapy', fontsize=12) plt.title( 'Comparison of Ablation Volumes [ml] from MAVERRIC Dataset by Chemotherapy', fontsize=12) figpathHist = os.path.join( "figures", "boxplot ablation volumes by chemo before ablation") gh.save(figpathHist, ext=['png'], close=True) # %% BOXPLOTS ABLATION VOLUMES # ttest df_volumes = df.copy() df_volumes.dropna(subset=['Ablation Volume [ml]'], inplace=True) df_volumes.dropna(subset=['Ablation Volume [ml] (manufacturers)'], inplace=True) ablation_vol = df_volumes['Ablation Volume [ml]'].tolist() ablation_vol_brochure = df_volumes[ 'Ablation Volume [ml] (manufacturers)'].tolist() stat, p_brochure = shapiro(ablation_vol_brochure) # interpret alpha_brochure = 0.05 if p_brochure > alpha_brochure: msg = 'Sample Ablation Volume Brochure looks Gaussian (fail to reject H0)' else: msg = 'Sample Ablation Volume Brochure does not look Gaussian (reject H0)' print(msg) stat, p_voxel = shapiro(ablation_vol) # interpret alpha_voxel = 0.05 if p_voxel > alpha_voxel: msg = 'Sample Ablation Volume looks Gaussian (fail to reject H0)' else: msg = 'Sample Ablation Volume does not look Gaussian (reject H0)' print(msg) if p_voxel < alpha_voxel and p_brochure < alpha_brochure: t, p = stats.mannwhitneyu(ablation_vol, ablation_vol_brochure) print( 'mann withney u test applied for samples coming from a non Gaussian distribution:' ) print("t = " + str(t)) print("p = " + str(p)) else: t, p = stats.ttest_ind(ablation_vol, ablation_vol_brochure) print('ttest applied for samples coming from a Gaussian distribution:') print("t = " + str(t)) print("p = " + str(p)) fig, ax = plt.subplots(figsize=(12, 10)) bp_dict = df.boxplot(column=[ 'Ablation Volume [ml]', 'Ablation Volume [ml] (parametrized_formula)', 'Ablation Volume [ml] (manufacturers)' ], ax=ax, notch=True, patch_artist=True, return_type='both') ax.set_xlabel('') row = bp_dict.lines # for idx,row in enumerate(lines): for i, box in enumerate(row['fliers']): box.set_marker('o') # box.set_edgecolor('RoyalBlue') for i, box in enumerate(row['boxes']): if i == 0: box.set_facecolor('Blue') box.set_edgecolor('MediumBlue') elif i == 1: box.set_facecolor('BlueViolet') box.set_edgecolor('BlueViolet') elif i == 2: box.set_facecolor('DeepSkyBlue') box.set_edgecolor('DodgerBlue') for i, box in enumerate(row['medians']): box.set_color(color='Black') box.set_linewidth(2) for i, box in enumerate(row['whiskers']): box.set_color(color='Black') box.set_linewidth(2) xticklabels = [ 'Ablation Volume [ml] (Voxel-Based)', 'Ablation Volume [ml] (Ellipsoid Formula)', 'Ablation Volume [ml] (Manufacturers Brochure)' ] xtickNames = plt.setp(ax, xticklabels=xticklabels) plt.setp(xtickNames, fontsize=10, color='black') plt.ylim([-2, 150]) plt.ylabel('Ablation Volume [ml]', fontsize=14, color='k') plt.tick_params(labelsize=10, color='black') ax.tick_params(colors='black', labelsize=10, color='k') ax.set_ylim([-2, 150]) plt.title('Comparison of Ablation Volumes [ml] from MAVERRIC Dataset', fontsize=16) figpathHist = os.path.join("figures", "boxplot volumes") gh.save(figpathHist, ext=['png'], close=True)
plt.xlabel( 'Percentage of Surface Margin Covered for different ablation margins ranges', fontsize=20, color='black') plt.ylabel('Frequency', fontsize=20, color='black') plt.title( 'Ablation Surface Margin Coverages [%] Histogram for all MWA device models.' ) plt.legend(fontsize=20) plt.xticks(range(0, 101, 10)) figpathHist = os.path.join("figures", "surface margin frequency percentages overlaid") plt.tick_params(labelsize=20, color='black') ax.tick_params(colors='black', labelsize=20) gh.save(figpathHist, ext=['png'], close=True, width=18, height=16) # %% percentage distances histograms for angyodinamics fig, ax = plt.subplots() df_angyodinamics["safety_margin_distribution_0"].replace(0, np.nan, inplace=True) df_angyodinamics["safety_margin_distribution_5"].replace(0, np.nan, inplace=True) df_angyodinamics["safety_margin_distribution_10"].replace(0, np.nan, inplace=True) idx_margins = df_angyodinamics.columns.get_loc('safety_margin_distribution_0') df_margins = df_angyodinamics.iloc[:, idx_margins:idx_margins + 3].copy() df_margins.reset_index(drop=True, inplace=True)
def plot_boxplots_volumes(ablation_vol_brochure, ablation_vol_measured, flag_subcapsular=None, device_name=None): """ """ # drop the nans effective_ablation_vol = ablation_vol_measured[ ~np.isnan(ablation_vol_measured)] predicted_ablation_vol = ablation_vol_brochure[ ~np.isnan(ablation_vol_brochure)] stat, p_brochure = shapiro(predicted_ablation_vol) # interpret alpha_brochure = 0.05 if p_brochure > alpha_brochure: msg = 'Sample Ablation Volume Brochure looks Gaussian (fail to reject H0)' else: msg = 'Sample Ablation Volume Brochure does not look Gaussian (reject H0)' print(msg) stat, p_voxel = shapiro(effective_ablation_vol) # interpret alpha_voxel = 0.05 if p_voxel > alpha_voxel: msg = 'Sample Ablation Volume looks Gaussian (fail to reject H0)' else: msg = 'Sample Ablation Volume does not look Gaussian (reject H0)' print(msg) if p_voxel < alpha_voxel and p_brochure < alpha_brochure: t, p = stats.mannwhitneyu(effective_ablation_vol, predicted_ablation_vol) print( 'mann withney u test applied for samples coming from a non Gaussian distribution:' ) print("t = " + str(t)) print("p = " + str(p)) else: t, p = stats.ttest_ind(effective_ablation_vol, predicted_ablation_vol) print('ttest applied for samples coming from a Gaussian distribution:') print("t = " + str(t)) print("p = " + str(p)) fig, ax = plt.subplots(figsize=(12, 10)) bplot = plt.boxplot(x=[predicted_ablation_vol, effective_ablation_vol], notch=False, patch_artist=True, widths=0.4) for element in ['medians', 'fliers', 'whiskers', 'caps']: plt.setp(bplot[element], color='black', linewidth=2.5) boxes = bplot['boxes'] plt.setp(boxes[1], color='seagreen') plt.setp(boxes[0], color='sandybrown') if flag_subcapsular is False: xticklabels = ['PAV', 'EAV (Deep Tumors)'] elif flag_subcapsular is True: xticklabels = ['PAV', 'EAV (Subcapsular Tumors)'] else: xticklabels = ['PAV', 'EAV'] xtickNames = plt.setp(ax, xticklabels=xticklabels) plt.setp(xtickNames, fontsize=10, color='black') plt.ylim([-2, 100]) plt.ylabel('Ablation Volume (mL)', fontsize=24, color='k') plt.tick_params(labelsize=24, color='black') ax.tick_params(colors='black', labelsize=24, color='k') ax.set_ylim([-2, 100]) plt.plot([], [], ' ', label=device_name) plt.legend(fontsize=24, loc='upper right', labelspacing=1, frameon=False) # plt.title('Comparison of Ablation Volumes [ml] from MAVERRIC Dataset', fontsize=16) figpathHist = os.path.join( "figures", "boxplot volumes EAV vs PAV Solero. Subcapsular - " + str(flag_subcapsular)) gh.save(figpathHist, ext=['png'], close=True, tight=True)
def plotHistDistances(pat_name, lesion_id, rootdir, distanceMap, num_voxels, title, ablation_date): # PLOT THE HISTOGRAM FOR THE MAUERER EUCLIDIAN DISTANCES lesion_id_str = str(lesion_id) lesion_id = lesion_id_str.split('.')[0] figName_hist = 'Pat_' + str(pat_name) + '_Lesion' + str( lesion_id) + '_AblationDate_' + ablation_date + '_histogram' min_val = int(np.floor(min(distanceMap))) max_val = int(np.ceil(max(distanceMap))) fig, ax = plt.subplots(figsize=(18, 16)) col_height, bins, patches = ax.hist(distanceMap, ec='darkgrey', bins=range(min_val - 1, max_val + 1)) voxels_nonablated = [] voxels_insuffablated = [] voxels_ablated = [] for b, p, col_val in zip(bins, patches, col_height): if b < 0: voxels_nonablated.append(col_val) elif 0 <= b <= 5: voxels_insuffablated.append(col_val) elif b > 5: voxels_ablated.append(col_val) # %% '''calculate the total percentage of surface for ablated, non-ablated, insufficiently ablated''' voxels_nonablated = np.asarray(voxels_nonablated) voxels_insuffablated = np.asarray(voxels_insuffablated) voxels_ablated = np.asarray(voxels_ablated) sum_perc_nonablated = ((voxels_nonablated / num_voxels) * 100).sum() sum_perc_insuffablated = ((voxels_insuffablated / num_voxels) * 100).sum() sum_perc_ablated = ((voxels_ablated / num_voxels) * 100).sum() # %% '''iterate through the bins to change the colors of the patches bases on the range [mm]''' for b, p, col_val in zip(bins, patches, col_height): if b < 0: plt.setp(p, label='Ablation Surface Margin ' + r'$x < 0$' + 'mm :' + " %.2f" % sum_perc_nonablated + '%') elif 0 <= b <= 5: plt.setp(p, 'facecolor', 'orange', label='Ablation Surface Margin ' + r'$0 \leq x \leq 5$' + 'mm: ' + "%.2f" % sum_perc_insuffablated + '%') elif b > 5: plt.setp(p, 'facecolor', 'darkgreen', label='Ablation Surface Margin ' + r'$x > 5$' + 'mm: ' + " %.2f" % sum_perc_ablated + '%') # %% '''edit the axes limits and labels''' # csfont = {'fontname': 'Times New Roman'} plt.xlabel('Euclidean Distances (mm)', fontsize=30, color='black') plt.tick_params(labelsize=30, color='black') ax.tick_params(colors='black', labelsize=30) plt.grid(True) # TODO: set equal axis limits ax.set_xlim([-15, 15]) # edit the y-ticks: change to percentage of surface yticks, locs = plt.yticks() percent = (yticks / num_voxels) * 100 percentage_surface_rounded = np.round(percent) yticks_percent = [str(x) + '%' for x in percentage_surface_rounded] new_yticks = (percentage_surface_rounded * yticks) / percent new_yticks[0] = 0 plt.yticks(new_yticks, yticks_percent) plt.ylabel('Frequency of percentage of tumor surface voxels', fontsize=30, color='black') handles, labels = plt.gca().get_legend_handles_labels() by_label = OrderedDict(zip(labels, handles)) # font = font_manager.FontProperties(family='Times New Roman', # style='normal', size=30) # plt.legend(by_label.values(), by_label.keys(), fontsize=30, loc='best', prop=font) plt.legend(by_label.values(), by_label.keys(), fontsize=30, loc='best') # ax.legend(prop=font) plt.title(title + '. Patient ' + str(pat_name) + '. Lesion ' + str(lesion_id), fontsize=30) figpathHist = os.path.join(rootdir, figName_hist) gh.save(figpathHist, width=18, height=16, ext=['png']) # return the percentages return sum_perc_nonablated, sum_perc_insuffablated, sum_perc_ablated
def interpolation_fct(df_ablation, df_radiomics, title=None, flag_needle_error=False, flag_overlap='Dice'): """ Interpolate the missing ablation volumes using the power and time from the brochure :param df_ablation: :param df_radiomics: :param title: :return: """ fontsize = 20 # perform interpolation as a function of power and time (multivariate interpolation) # EXTRACT VALUES FROM THE MWA BROCHURE points_power = np.asarray(df_ablation['Power']).reshape( (len(df_ablation), 1)) points_time = np.asarray(df_ablation['Time_Duration_Applied']).reshape( (len(df_ablation), 1)) points = np.hstack((points_power, points_time)) values = np.asarray(df_ablation['Predicted Ablation Volume (ml)']).reshape( (len(df_ablation), 1)) # EXTRACT VALUES FROM RADIOMICS (Effective measured values) df_radiomics.dropna(subset=['Power', 'Time_Duration_Applied'], inplace=True) grid_x = np.asarray(df_radiomics['Power']).reshape((len(df_radiomics), 1)) grid_y = np.asarray(df_radiomics['Time_Duration_Applied']).reshape( (len(df_radiomics), 1)) xi = np.hstack((grid_x, grid_y)) ablation_vol_interpolated_brochure = griddata(points, values, xi, method='linear') # PREDICTED VS MEASURED SCATTER PIE CHART ablation_vol_measured = np.asarray( df_radiomics['Ablation Volume [ml]']).reshape(len(df_radiomics), 1) ratios_0 = df_radiomics.safety_margin_distribution_0.tolist() ratios_5 = df_radiomics.safety_margin_distribution_5.tolist() ratios_10 = df_radiomics.safety_margin_distribution_10.tolist() # ACTUALLY PLOT STUFF fig, ax = plt.subplots() if flag_needle_error is False: for idx, val in enumerate(ablation_vol_interpolated_brochure): xs = ablation_vol_interpolated_brochure[idx] ys = ablation_vol_measured[idx] ratio_0 = ratios_0[idx] / 100 ratio_5 = ratios_5[idx] / 100 ratio_10 = ratios_10[idx] / 100 if ~(np.isnan(xs)) and ~(np.isnan(ys)): draw_pie([ratio_0, ratio_5, ratio_10], xs, ys, 500, colors=['red', 'orange', 'green'], ax=ax) plt.ylabel('Effective Ablation Volume (mL)', fontsize=fontsize) plt.xlabel('Predicted Ablation Volume Brochure (mL)', fontsize=fontsize) plt.xlim([0, 80]) plt.ylim([0, 80]) else: y = ablation_vol_measured / ablation_vol_interpolated_brochure # ratio EAV/PAV fig_title = '_ratio_EAV_PAV' ylabel_text = 'R(EAV:PAV)' needle_error = np.asarray(df_radiomics['needle_error']).reshape( len(df_radiomics), 1) if flag_overlap == 'Dice': y = np.asarray(df_radiomics['Dice']).reshape(len(df_radiomics), 1) fig_title = '_Dice_' ylabel_text = 'Dice Score' if flag_overlap == 'Volume Overlap Error': y = np.asarray(df_radiomics['Volume Overlap Error']).reshape( len(df_radiomics), 1) fig_title = '_Volume Overlap Error' ylabel_text = 'Volume Overlap Error' if flag_overlap == 'Tumour residual volume [ml]': y = np.asarray( df_radiomics['Tumour residual volume [ml]']).reshape( len(df_radiomics), 1) # tumor_radius = np.asarray(df_radiomics['major_axis_length_tumor']/2).reshape(len(df_radiomics), 1) # y_normalized = df_radiomics['Tumour residual volume [ml]'] / df_radiomics['Tumour Volume [ml]'] # x_normalized = needle_error/tumor_radius # y = np.asarray(y_normalized).reshape(len(df_radiomics), 1) # x = np.asarray(x_normalized).reshape(len(df_radiomics), 1) fig_title = '_Tumour residual volume [ml]' ylabel_text = 'Tumour residual volume (mL)' for idx, val in enumerate(ablation_vol_interpolated_brochure): ys = y[idx] xs = needle_error[idx] ratio_0 = ratios_0[idx] / 100 ratio_5 = ratios_5[idx] / 100 ratio_10 = ratios_10[idx] / 100 if ~(np.isnan(xs)) and ~(np.isnan(ys)): draw_pie([ratio_0, ratio_5, ratio_10], xs, ys, 500, colors=['red', 'orange', 'green'], ax=ax) # ax.set_xscale('log') plt.ylabel(ylabel_text, fontsize=fontsize + 2) plt.xlabel('Lateral Error (mm)', fontsize=fontsize + 2) plt.xlim([-0.2, 6]) plt.ylim([-0.2, 3]) red_patch = mpatches.Patch(color='red', label='Ablation Margin ' + r'$x < 0$' + 'mm') orange_patch = mpatches.Patch(color='orange', label='Ablation Margin ' + r'$0 \leq x \leq 5$' + 'mm') green_patch = mpatches.Patch(color='darkgreen', label='Ablation Margin ' + r'$x > 5$' + 'mm') plt.legend(handles=[red_patch, orange_patch, green_patch], fontsize=fontsize, loc='best', title=title, title_fontsize=21) props = dict(boxstyle='round', facecolor='white', edgecolor='gray') # textstr = title # ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=20, verticalalignment='top', bbox=props) ax.tick_params(axis='y', labelsize=fontsize, color='k') ax.tick_params(axis='x', labelsize=fontsize, color='k') plt.tick_params(labelsize=fontsize, color='black') # plt.xlim([8, 46]) # plt.ylim([-0.2, 3.5]) if flag_needle_error is True: figpath = os.path.join( "figures", title + "_pie_charts_euclidean_error" + fig_title) else: figpath = os.path.join("figures", title + "_pie_charts") gh.save(figpath, width=14, height=10, close=True, dpi=600, tight=True) plt.show()
def scatter_plot(df1, **kwargs): """ df, x_data, y_data, title, x_label=False, y_label='', lin_reg='' :param df: :param x_data: :param x_data: :param title: :param x_label: :param x_label: :param lin_reg: :return: """ fig, ax = plt.subplots() fontsize = 24 if kwargs.get('x_data') is None: print('No X input data to plot') return if kwargs.get('y_data') is None: print('No Y input data to plot') return df_to_plot = df1.copy() # drop NaNs from both x and y # df_to_plot.dropna(inplace=True) df_to_plot.dropna(subset=[kwargs["y_data"], kwargs["x_data"]], inplace=True) if kwargs.get('colormap') is not None: X_scatter = df_to_plot[kwargs['x_data']] Y_scatter = df_to_plot[kwargs['y_data']] t = df_to_plot[kwargs['colormap']] plt.scatter(X_scatter, Y_scatter, c=t, cmap='viridis', s=20) cbar = plt.colorbar() cbar.ax.set_title(kwargs['colormap'], fontsize=8) else: df_to_plot.plot.scatter(x=kwargs["x_data"], y=kwargs["y_data"], s=100, alpha=0.7, color='purple', marker='*') if kwargs.get('size') is not None: size = np.asarray(50 * (df_to_plot[kwargs['size']] + 1)).reshape( len(df_to_plot), 1) size_mask = ~np.isnan(size) size = size[size_mask] # color = 'steelblue' fig, ax = plt.subplots() x = df_to_plot[kwargs['x_data']] y = df_to_plot[kwargs['y_data']] sc = ax.scatter(x, y, s=size, alpha=0.6, marker='*', color='purple') legend_1 = ax.legend(*sc.legend_elements("sizes", num=6, func=lambda x: x / 50 - 1, color='steelblue'), title=kwargs['size'], labelspacing=1.5, borderpad=1.5, handletextpad=3.5, fontsize=fontsize, loc='upper right') legend_1.get_title().set_fontsize(str(fontsize)) ax.add_artist(legend_1) if kwargs.get('lin_reg') is not None: X = np.array(df_to_plot[kwargs['x_data']]) Y = np.array(df_to_plot[kwargs['y_data']]) regr = linear_model.LinearRegression() X = X.reshape(len(X), 1) Y = Y.reshape(len(Y), 1) regr.fit(X, Y) SS_tot = np.sum((Y - np.mean(Y))**2) residuals = Y - regr.predict(X) SS_res = np.sum(residuals**2) r_squared = 1 - (SS_res / SS_tot) correlation_coef = np.corrcoef(X[:, 0], Y[:, 0])[0, 1] label_r2 = r'$R^2:{0:.2f}$'.format(r_squared) label_r = r'$r: {0:.2f}$'.format(correlation_coef) plt.plot(X, regr.predict(X), color='orange', linewidth=1.5, label='Linear Regression') plt.plot([], [], ' ', label='n = ' + str(len(kwargs['x_data']))) plt.plot([], [], ' ', label=label_r) plt.plot([], [], ' ', label=label_r2) if kwargs.get('legend_title'): plt.legend(fontsize=fontsize, loc='best', title=kwargs['legend_title'], title_fontsize=fontsize) else: plt.legend(fontsize=fontsize, loc='upper left') if kwargs.get('x_lim') is not None and kwargs.get('y_lim') is not None: plt.xlim([0, kwargs['x_lim']]) plt.ylim([0, kwargs['y_lim']]) title = kwargs['title'] # plt.tick_params(labelsize=fontsize, color='black') # ax.tick_params(axis='y', labelsize=fontsize, color='k') # ax.tick_params(axis='x', labelsize=fontsize, color='k') # ax.xaxis.label.set_color('black') # ax.yaxis.label.set_color('black') # matplotlib.rc('axes', labelcolor='black') figpathHist = os.path.join("figures", title) if kwargs.get('x_label') is not None and kwargs.get('y_label') is None: plt.xlabel(kwargs['x_label'], fontsize=fontsize, color='k') plt.ylabel(kwargs['y_data'], fontsize=fontsize, color='k') elif kwargs.get('y_label') is not None and kwargs.get('x_label') is None: plt.ylabel(kwargs['y_label'], fontsize=fontsize, color='k') plt.xlabel(kwargs['x_data'], fontsize=fontsize, color='k') elif kwargs.get('x_label') is None and kwargs.get('y_label') is None: plt.xlabel(kwargs['x_data'], fontsize=fontsize, color='k') plt.ylabel(kwargs['y_data'], fontsize=fontsize, color='k') gh.save(figpathHist, width=12, height=12, ext=["png"], tight=True, close=True, dpi=600) plt.close('all')
def scatter_plot_groups(df1_no_outliers): groups = df1_no_outliers.groupby('Proximity_to_vessels') fig, ax = plt.subplots() lesion_per_device = [] device_name_grp = [] for name, group in groups: ax.plot(group["Energy [kj]"], group["Ablation Volume [ml]"], marker='o', linestyle='', ms=14, label=name) lesion_per_device.append(len(group)) device_name_grp.append(name) L = ax.legend() L_labels = L.get_texts() for idx, L in enumerate(L_labels): L.set_text(device_name_grp[idx] + ' N=' + str(lesion_per_device[idx])) plt.xlabel('Energy [kJ]', fontsize=20, color='black') plt.ylabel('Ablation Volume [ml]', fontsize=20, color='black') plt.title("Ablation Volume vs Energy Grouped by Proximity to Vessels", fontsize=20, color='black') plt.tick_params(labelsize=20, color='black') plt.legend(title_fontsize=20) ax.tick_params(colors='black', labelsize=20) figpathHist = os.path.join( "figures", "Ablation Volume vs Energy Grouped by Proximity to Vessels.") gh.save(figpathHist, width=18, height=16, ext=['png'], close=True) # %% group by device name groups = df1_no_outliers.groupby('Device_name') fig, ax = plt.subplots() lesion_per_device = [] device_name_grp = [] for name, group in groups: ax.plot(group["Energy [kj]"], group["Ablation Volume [ml]"], marker='o', linestyle='', ms=14, label=name) lesion_per_device.append(len(group)) device_name_grp.append(name) L = ax.legend() L_labels = L.get_texts() for idx, L in enumerate(L_labels): L.set_text( str(device_name_grp[idx]) + ' N=' + str(lesion_per_device[idx])) plt.title('Ablation Volume [ml] vs Energy [kJ] per MWA Device Type.', fontsize=20) plt.xlabel('Energy [kJ]', fontsize=20, color='black') plt.ylabel('Ablation Volume [ml]', fontsize=20, color='black') plt.tick_params(labelsize=20, color='black') plt.legend(title_fontsize=20) ax.tick_params(colors='black', labelsize=20) figpathHist = os.path.join( "figures", "Ablation Volume vs Energy per MWA Device Category.") gh.save(figpathHist, width=18, height=16, ext=['png'], close=True) # chemotherapy groups = df1_no_outliers.groupby('chemo_before_ablation') fig, ax = plt.subplots() lesion_per_device = [] device_name_grp = [] for name, group in groups: ax.plot(group["Energy [kj]"], group["Ablation Volume [ml]"], marker='o', linestyle='', ms=14, label=name) lesion_per_device.append(len(group)) device_name_grp.append(name) L = ax.legend() L_labels = L.get_texts() for idx, L in enumerate(L_labels): L.set_text( str(device_name_grp[idx]) + ' N=' + str(lesion_per_device[idx])) plt.title( 'Ablation Volume [ml] vs Energy [kJ] grouped by Chemotherapy Treatment before Ablation.', fontsize=20) plt.xlabel('Energy [kJ]', fontsize=20, color='black') plt.ylabel('Ablation Volume [ml]', fontsize=20, color='black') plt.tick_params(labelsize=20, color='black') plt.legend(title_fontsize=20) ax.tick_params(colors='black', labelsize=20) figpathHist = os.path.join( "figures", "Ablation Volume vs Energy grouped by chemotherapy.") gh.save(figpathHist, width=18, height=16, ext=['png'], close=True)
def plot_boxplots_chemo(df): """ boxplot chemotherapy :param df: dataframe :return: ttest values """ fig, ax = plt.subplots(figsize=(12, 10)) df_chemo = df.copy() df_chemo['Ablation Volume [ml] / Energy [kJ]'] = df_chemo[ 'Ablation Volume [ml]'] / df_chemo['Energy [kj]'] df_chemo.dropna(subset=['Ablation Volume [ml] / Energy [kJ]'], inplace=True) df_chemo.dropna(subset=['chemo_before_ablation'], inplace=True) df_chemo['chemo_before_ablation'].replace('No', False, inplace=True) df_chemo['chemo_before_ablation'].replace('Yes', True, inplace=True) df.dropna(subset=['Ablation Volume [ml]'], inplace=True) df.dropna(subset=['chemo_before_ablation'], inplace=True) df['chemo_before_ablation'].replace('No', False, inplace=True) df['chemo_before_ablation'].replace('Yes', True, inplace=True) # ttest no_chemo_df = df_chemo[df_chemo['chemo_before_ablation'] == False] no_chemo = no_chemo_df['Ablation Volume [ml]'].tolist() chemo_df = df_chemo[df_chemo['chemo_before_ablation'] == True] chemo = chemo_df['Ablation Volume [ml]'].tolist() fig, ax = plt.subplots(figsize=(12, 10)) plt.hist(no_chemo) plt.title('No Chemotherapy') plt.ylabel('Ablation Volume [ml]') figpathHist = os.path.join("figures", "histogram ablation volumes no chemo") gh.save(figpathHist, ext=['png'], close=True) fig1, ax = plt.subplots(figsize=(12, 10)) plt.hist(chemo) plt.title('Chemotherapy') plt.ylabel('Ablation Volume [ml] ') figpathHist = os.path.join("figures", "histogram ablation volumes chemo") gh.save(figpathHist, ext=['png'], close=True) print('no of tumors with chemo:', str(len(chemo))) print('no of tumors with no chemo:', str(len(no_chemo))) # stat, p_chemo = shapiro(chemo) # interpret alpha_chemo = 0.05 if p_chemo > alpha_chemo: msg = 'Sample Chemo looks Gaussian (fail to reject H0)' else: msg = 'Sample Chemo does not look Gaussian (reject H0)' print(msg) stat, p_no_chemo = shapiro(no_chemo) # interpret alpha_no_chemo = 0.05 if p_no_chemo > alpha_no_chemo: msg = 'Sample No Chemo looks Gaussian (fail to reject H0)' else: msg = 'Sample No Chemo does not look Gaussian (reject H0)' print(msg) if p_no_chemo < alpha_no_chemo and p_chemo < alpha_chemo: t, p = stats.mannwhitneyu(chemo, no_chemo) print( 'mann withney u test applied for samples coming from a non Gaussian distribution:' ) print("t = " + str(t)) print("p = " + str(p)) else: t, p = stats.ttest_ind(chemo, no_chemo) print('ttest applied for samples coming from a Gaussian distribution:') print("t = " + str(t)) print("p = " + str(p)) fig, ax = plt.subplots(figsize=(12, 10)) bp_dict = df.boxplot(column=['Ablation Volume [ml]'], ax=ax, notch=True, by='chemo_before_ablation', patch_artist=True, return_type='both') ax.set_xlabel('') plt.show() for row_key, (ax, row) in bp_dict.iteritems(): for i, box in enumerate(row['fliers']): box.set_marker('o') for i, box in enumerate(row['boxes']): if i == 0: box.set_facecolor('Purple') box.set_edgecolor('DarkMagenta') else: box.set_facecolor('LightPink') box.set_edgecolor('HotPink') for i, box in enumerate(row['medians']): box.set_color(color='Black') box.set_linewidth(2) for i, box in enumerate(row['whiskers']): box.set_color(color='Black') box.set_linewidth(2) xticklabels = [ 'No Chemotherapy before Ablation', 'Chemotherapy Administered before Ablation' ] xtickNames = plt.setp(ax, xticklabels=xticklabels) plt.setp(xtickNames, fontsize=10, color='black') plt.ylim([-2, 120]) plt.ylabel('Ablation Volume [ml]', fontsize=12, color='k') plt.tick_params(labelsize=10, color='black') ax.tick_params(colors='black', labelsize=10, color='k') ax.set_ylim([-2, 120]) plt.xlabel('') fig.suptitle('') plt.title('') # plt.title('Comparison of Ratio (Ablation Volumes [ml] : Energy [kJ]) from MAVERRIC Dataset by Chemotherapy', fontsize=12) plt.title( 'Comparison of Ablation Volumes [ml] from MAVERRIC Dataset by Chemotherapy', fontsize=12) figpathHist = os.path.join( "figures", "boxplot ablation volumes by chemo before ablation") gh.save(figpathHist, ext=['png'], close=True)
def plot_subplots(df_radiomics): """ Plot a 3-subplot of pav vs eav, subcapsular and chemo :param df_radiomics: :return: plot fo png file """ # Set up the matplotlib figure f, axes = plt.subplots(1, 3, figsize=(20, 20)) fontsize = 10 fontsize_legend = 9 df = pd.DataFrame() df['PAV'] = df_radiomics['Predicted_Ablation_Volume'] df['EAV'] = df_radiomics['Ablation Volume [ml]'] df['Energy (kJ)'] = df_radiomics['Energy [kj]'] df['MWA Systems'] = df_radiomics['Device_name'] df['Proximity_to_surface'] = df_radiomics['Proximity_to_surface'] df['Chemotherapy'] = df_radiomics['chemo_before_ablation'] df['Chemo_yes'] = df['EAV'] df['Chemo_no'] = df['EAV'] df['Subcapsular'] = df['EAV'] df['Non-Subcapsular'] = df['EAV'] df.loc[ df.Proximity_to_surface == False, 'Subcapsular'] = np.nan # only keep those with value true, ie subcapsular df.loc[df.Proximity_to_surface == True, 'Non-Subcapsular'] = np.nan df.loc[df.Chemotherapy == 'No', 'Chemo_yes'] = np.nan df.loc[df.Chemotherapy == 'Yes', 'Chemo_no'] = np.nan # chemo no print('Nr Samples used:', str(len(df))) # 1st plot PAV vs EAV with lin regr slope, intercept, r_square, p_value, std_err = stats.linregress( df['EAV'], df['PAV']) print('slope value PAV vs. EAV:', slope) print('p-value PAV vs EAV:', p_value) sns.regplot(x="PAV", y="EAV", data=df, scatter_kws={ "s": 11, "alpha": 0.6 }, color=sns.xkcd_rgb["violet"], line_kws={'label': r'$r:{0:.2f}$'.format(r_square)}, ax=axes[0]) axes[0].legend(fontsize=fontsize_legend, loc='upper left') axes[0].set_ylabel('EAV (mL)', fontsize=fontsize) axes[0].set_xlabel('PAV (mL)', fontsize=fontsize) # Subcapsular 2nd plot subcapsular_false = df[df['Proximity_to_surface'] == False] subcapsular_true = df[df['Proximity_to_surface'] == True] slope, intercept, r_1, p_value, std_err = stats.linregress( subcapsular_false['PAV'], subcapsular_false['EAV']) slope, intercept, r_2, p_value, std_err = stats.linregress( subcapsular_true['PAV'], subcapsular_true['EAV']) # Wilcoxon paired signed rank test w, p = stats.wilcoxon(subcapsular_true['PAV'], subcapsular_true['EAV']) print('p-val wilcoxon subcapsular true:', p) w, p = stats.wilcoxon(subcapsular_false['PAV'], subcapsular_false['EAV']) print('p-val wilcoxon subcapsular false:', p) sns.regplot(y="Non-Subcapsular", x="PAV", data=df, scatter_kws={ "s": 11, "alpha": 0.6 }, line_kws={'label': r'No: $r = {0:.2f}$'.format(r_1)}, ax=axes[1]) sns.regplot(y="Subcapsular", x="PAV", data=df, scatter_kws={ "s": 11, "alpha": 0.6 }, color=sns.xkcd_rgb["orange"], line_kws={'label': r'Yes: $r = {0:.2f}$'.format(r_2)}, ax=axes[1]) axes[1].legend(fontsize=fontsize_legend, loc='best', title='Subcapsular', title_fontsize=fontsize_legend) axes[1].set_yticklabels([]) axes[1].set_ylabel('') axes[1].set_xlabel('PAV (mL)', fontsize=fontsize) axes[1].set_title( 'Predicted (PAV) vs Effective Ablation Volume (EAV) for 3 MWA Devices', fontsize=fontsize, pad=20) # Chemo 3rd plot chemo_false = df[df['Chemotherapy'] == 'No'] chemo_true = df[df['Chemotherapy'] == 'Yes'] x1 = chemo_false['PAV'] y1 = chemo_false['EAV'] x2 = chemo_true['PAV'] y2 = chemo_true['EAV'] slope, intercept, r_1, p_value, std_err = stats.linregress(x1, y1) slope, intercept, r_2, p_value, std_err = stats.linregress(x2, y2) w, p = stats.wilcoxon(chemo_true['PAV'], chemo_true['EAV']) print('p-val chemotherapy yes:', p) w, p = stats.wilcoxon(chemo_false['PAV'], chemo_false['EAV']) print('p-val chemotherapy no:', p) sns.regplot(y='Chemo_yes', x="PAV", data=df, scatter_kws={ "s": 11, "alpha": 0.6 }, color=sns.xkcd_rgb["teal green"], ax=axes[2], line_kws={'label': r'Yes: $r = {0:.2f}$'.format(r_2)}) sns.regplot(y='Chemo_no', x="PAV", data=df, scatter_kws={ "s": 11, "alpha": 0.6 }, color=sns.xkcd_rgb["slate grey"], ax=axes[2], line_kws={'label': r'No: $r = {0:.2f}$'.format(r_1)}) axes[2].legend(fontsize=fontsize_legend, loc='best', title='Chemotherapy', title_fontsize=fontsize_legend) axes[2].set_yticklabels([]) axes[2].set_ylabel('') axes[2].set_xlabel('PAV (mL)', fontsize=fontsize) # add major title to subplot # f.suptitle('Predicted (PAV) vs Effective Ablation Volume (EAV) for 3 MWA Devices', fontsize=10) # set the axes limits and new ticks axes[2].set_xlim([0, 81]) axes[1].set_xlim([0, 81]) axes[0].set_xlim([0, 81]) axes[2].set_ylim([0, 81]) axes[1].set_ylim([0, 81]) axes[0].set_ylim([0, 81]) axes[0].xaxis.set_ticks(np.arange(0, 81, 20)) axes[1].xaxis.set_ticks(np.arange(0, 81, 20)) axes[2].xaxis.set_ticks(np.arange(0, 81, 20)) plt.subplots_adjust(wspace=0.1) # set the fontsize of the ticks of the subplots for ax in axes: ax.set(adjustable='box', aspect='equal') ax.tick_params(axis='both', which='major', labelsize=fontsize) # save the figure timestr = time.strftime("%H%M%S-%Y%m%d") figpath = os.path.join("figures", 'All_3MWA_SUbcapsular_Chemo_' + timestr) gh.save(figpath, ext=["png"], width=12, height=12, close=True, tight=True, dpi=600)
plt.setp(xtickNames, fontsize=20, color='black') plt.ylabel('Ablation Margin Distances [mm]', fontsize=18, color='black') ax.tick_params(colors='black') plt.xticks(fontsize=18) ax.tick_params(axis='both', labelsize=20) # [ax_tmp.set_xlabel('aaaa') for ax_tmp in np.asarray(bp).reshape(-1)] # fig = np.asarray(bp).reshape(-1)[0].get_figure() plt.title( 'Ablation Margin by Local Tumor Progression (LTP). Number of samples: ' + str(len(df_final)) + '.', fontsize=20) fig.suptitle('') # ax.set_ylim([-10, 17]) figpathHist = os.path.join( "figures", "boxplot LTP ablation margin_not_subcapsular_outliers") gh.save(figpathHist, ext=['png'], close=True) # %% fig, ax = plt.subplots(figsize=(10, 8)) bp_dict = df_final.boxplot(column='Ablation Volume [ml]', by='LTP', ax=ax, return_type='both', patch_artist=True) for row_key, (ax, row) in bp_dict.iteritems(): ax.set_xlabel('') for i, box in enumerate(row['boxes']): box.set_facecolor('CornflowerBlue') box.set_edgecolor('RoyalBlue') for i, box in enumerate(row['medians']): box.set_color(color='Black')
def call_plot_pies(df_radiomics, title=None, flag_plot_type=None, flag_overlap=None): """ PREDICTED VS MEASURED SCATTER PIE CHART with distances represented :param flag_overlap: :param flag_plot_type: :param df_radiomics: :param title: :return: """ fontsize = 18 ablation_vol_interpolated_brochure = np.asanyarray( df_radiomics['Predicted_Ablation_Volume']).reshape( len(df_radiomics), 1) ablation_vol_measured = np.asarray( df_radiomics['Ablation Volume [ml]']).reshape(len(df_radiomics), 1) df_radiomics['MEV-MIV'] = df_radiomics[ 'Outer Ellipsoid Volume'] - df_radiomics['Inner Ellipsoid Volume'] df_radiomics['R(EAV:PAV)'] = df_radiomics[ 'Ablation Volume [ml]'] / df_radiomics['Predicted_Ablation_Volume'] ratios_0 = df_radiomics.safety_margin_distribution_0.tolist() ratios_5 = df_radiomics.safety_margin_distribution_5.tolist() ratios_10 = df_radiomics.safety_margin_distribution_10.tolist() # %% ACTUALLY PLOT STUFF fig, ax = plt.subplots() if flag_plot_type == 'PAV_EAV': for idx, val in enumerate(ablation_vol_interpolated_brochure): xs = ablation_vol_interpolated_brochure[idx] ys = ablation_vol_measured[idx] ratio_0 = ratios_0[idx] / 100 ratio_5 = ratios_5[idx] / 100 ratio_10 = ratios_10[idx] / 100 if ~(np.isnan(xs)) and ~(np.isnan(ys)): draw_pie([ratio_0, ratio_5, ratio_10], xs, ys, 500, colors=['red', 'orange', 'green'], ax=ax) plt.ylabel('Effective Ablation Volume (mL)', fontsize=fontsize) plt.xlabel('Predicted Ablation Volume (mL)', fontsize=fontsize) plt.xlim([0, 80]) plt.ylim([0, 80]) elif flag_plot_type == 'MEV_MIV': # drop the rows where MIV > MEV # since the minimum inscribed ellipsoid (MIV) should always be smaller than the maximum enclosing ellipsoid (MEV) df_radiomics = df_radiomics[ df_radiomics['Outer Ellipsoid Volume'] < 150] print('Nr Samples used for Outer Ellipsoid Volume < 150 ml:', len(df_radiomics)) df_radiomics = df_radiomics[df_radiomics['MEV-MIV'] >= 0] print('Nr Samples used for MEV-MIV >=0 :', len(df_radiomics)) r_eav_pav = np.asarray(df_radiomics['R(EAV:PAV)']).reshape( len(df_radiomics), 1) mev_miv = np.asarray(df_radiomics['MEV-MIV']).reshape( len(df_radiomics), 1) for idx, val in enumerate(mev_miv): xs = mev_miv[idx] ys = r_eav_pav[idx] ratio_0 = ratios_0[idx] / 100 ratio_5 = ratios_5[idx] / 100 ratio_10 = ratios_10[idx] / 100 if ~(np.isnan(xs)) and ~(np.isnan(ys)): draw_pie([ratio_0, ratio_5, ratio_10], xs, ys, 500, colors=['red', 'orange', 'green'], ax=ax) plt.xlabel('Ablation Volume Irregularity (MEV-MIV) (mL)', fontsize=fontsize) plt.ylabel('R(EAV:PAV)', fontsize=fontsize) # plt.xlim([0, 80]) # plt.ylim([0, 80]) else: # other x and y labels y = ablation_vol_measured / ablation_vol_interpolated_brochure # ratio EAV/PAV fig_title = '_ratio_EAV_PAV' ylabel_text = 'R(EAV:PAV)' needle_error = np.asarray(df_radiomics['needle_error']).reshape( len(df_radiomics), 1) if flag_overlap == 'Dice': y = np.asarray(df_radiomics['Dice']).reshape(len(df_radiomics), 1) fig_title = '_Dice_' ylabel_text = 'Dice Score' if flag_overlap == 'Volume Overlap Error': y = np.asarray(df_radiomics['Volume Overlap Error']).reshape( len(df_radiomics), 1) fig_title = '_Volume Overlap Error' ylabel_text = 'Volume Overlap Error' if flag_overlap == 'Tumour residual volume [ml]': y = np.asarray( df_radiomics['Tumour residual volume [ml]']).reshape( len(df_radiomics), 1) # tumor_radius = np.asarray(df_radiomics['major_axis_length_tumor']/2).reshape(len(df_radiomics), 1) # y_normalized = df_radiomics['Tumour residual volume [ml]'] / df_radiomics['Tumour Volume [ml]'] # x_normalized = needle_error/tumor_radius # y = np.asarray(y_normalized).reshape(len(df_radiomics), 1) # x = np.asarray(x_normalized).reshape(len(df_radiomics), 1) fig_title = '_Tumour residual volume [ml]' ylabel_text = 'Tumour residual volume (mL)' for idx, val in enumerate(ablation_vol_interpolated_brochure): ys = y[idx] xs = needle_error[idx] ratio_0 = ratios_0[idx] / 100 ratio_5 = ratios_5[idx] / 100 ratio_10 = ratios_10[idx] / 100 if ~(np.isnan(xs)) and ~(np.isnan(ys)): draw_pie([ratio_0, ratio_5, ratio_10], xs, ys, 500, colors=['red', 'orange', 'green'], ax=ax) # ax.set_xscale('log') plt.ylabel(ylabel_text, fontsize=fontsize + 2) plt.xlabel('Lateral Error (mm)', fontsize=fontsize + 2) plt.xlim([-0.2, 6]) plt.ylim([-0.2, 3]) # %% EDIT THE PLOTS with colors red_patch = mpatches.Patch(color='red', label='Ablation Margin ' + r'$x < 0$' + 'mm') orange_patch = mpatches.Patch(color='orange', label='Ablation Margin ' + r'$0 \leq x < 5$' + 'mm') green_patch = mpatches.Patch(color='darkgreen', label='Ablation Margin ' + r'$x \geq 5$' + 'mm') plt.legend(handles=[red_patch, orange_patch, green_patch], fontsize=fontsize, loc='best', title=title, title_fontsize=21) props = dict(boxstyle='round', facecolor='white', edgecolor='gray') # textstr = title # ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=20, verticalalignment='top', bbox=props) ax.tick_params(axis='y', labelsize=fontsize, color='k') ax.tick_params(axis='x', labelsize=fontsize, color='k') plt.tick_params(labelsize=fontsize, color='black') timestr = time.strftime("%H%M%S-%Y%m%d") figpath = os.path.join("figures", 'pie_charts_' + flag_plot_type + timestr) gh.save(figpath, ext=["png"], width=12, height=12, close=True, tight=True, dpi=300)
def scatter_plot(df1, **kwargs): """ df, x_data, y_data, title, x_label=False, y_label='', lin_reg='' :param df: :param x_data: :param x_data: :param title: :param x_label: :param x_label: :param lin_reg: :return: """ fig, ax = plt.subplots() if kwargs.get('x_data') is None: print('No X input data to plot') return if kwargs.get('y_data') is None: print('No Y input data to plot') return df_to_plot = df1.copy() # drop nans df_to_plot.dropna(subset=[kwargs["x_data"]], inplace=True) df_to_plot.dropna(subset=[kwargs["y_data"]], inplace=True) if kwargs.get('colormap') is not None: X_scatter = df_to_plot[kwargs['x_data']] Y_scatter = df_to_plot[kwargs['y_data']] t = df_to_plot[kwargs['colormap']] plt.scatter(X_scatter, Y_scatter, c=t, cmap='viridis', s=20) cbar = plt.colorbar() cbar.ax.set_title(kwargs['colormap'], fontsize=8) else: df_to_plot.plot.scatter(x=kwargs["x_data"], y=kwargs["y_data"], s=14) if kwargs.get('size') is not None: size = df_to_plot[kwargs['size']] + 10 df_to_plot.plot.scatter(x=kwargs["x_data"], y=kwargs["y_data"], s=size) # cbar = plt.colorbar() # cbar.ax.set_title(kwargs['colormap'], fontsize=8) if kwargs.get('x_label') is not None and kwargs.get('y_label') is None: plt.xlabel(kwargs['x_label'], fontsize=8, color='k') plt.ylabel(kwargs['y_data'], fontsize=8, color='k') elif kwargs.get('y_label') is not None and kwargs.get('x_label') is None: plt.ylabel(kwargs['y_label'], fontsize=8, color='k') plt.xlabel(kwargs['x_data'], fontsize=8, color='k') elif kwargs.get('x_label') is None and kwargs.get('y_label') is None: plt.xlabel(kwargs['x_data'], fontsize=8, color='k') plt.ylabel(kwargs['y_data'], fontsize=8, color='k') if kwargs.get('lin_reg') is not None: X = np.array(df_to_plot[kwargs['x_data']]) Y = np.array(df_to_plot[kwargs['y_data']]) regr = linear_model.LinearRegression() X = X.reshape(len(X), 1) Y = Y.reshape(len(Y), 1) regr.fit(X, Y) SS_tot = np.sum((Y - np.mean(Y))**2) residuals = Y - regr.predict(X) SS_res = np.sum(residuals**2) r_squared = 1 - (SS_res / SS_tot) correlation_coef = np.corrcoef(X[:, 0], Y[:, 0])[0, 1] label = r'$R^2: $ {0:.2f}; r: {1:.2f}'.format(r_squared, correlation_coef) plt.plot(X, regr.predict(X), color='orange', linewidth=1.5, label=label) nr_samples = ' Nr. samples: ' + str(len(df_to_plot)) plt.title(kwargs['title'] + nr_samples, fontsize=8) plt.legend(fontsize=10) plt.tick_params(labelsize=8, color='black') ax.tick_params(axis='y', labelsize=8, color='k') ax.tick_params(axis='x', labelsize=8, color='k') ax.xaxis.label.set_color('black') ax.yaxis.label.set_color('black') matplotlib.rc('axes', labelcolor='black') figpathHist = os.path.join("figures", kwargs['title']) gh.save(figpathHist, ext=["png"], close=True) plt.close('all')