def qq_plot(self): ax = pg.qqplot( self.log_returns, dist='norm', confidence=False, ) plt.show()
prediction['standarized_prediction'] = ( prediction['predicted'] - prediction['predicted'].mean()) / prediction['predicted'].std() final_summary.head() # In[89]: _ = sns.scatterplot(x=final_summary['standard_resid'], y=prediction['standarized_prediction']) _ = plt.axhline(y=0) plt.show() # In[90]: _ = pg.qqplot(final_summary['standard_resid']) plt.show() # In[99]: fig, ax = plt.subplots(figsize=(6, 4)) ax = plt.hist(final_summary['student_resid'], density=True, bins=30, edgecolor='black', linewidth=1.4) plt.xlabel('student_resid', fontsize=14) plt.show() # ##### this assumption was also met
ax = sns.kdeplot(day2['theoritical_normal'], color='red') ax = plt.xlim([0,day2['day2'].max() ]) plt.show() # Day 3 fig,ax = plt.subplots(figsize=(12, 8)) ax = plt.hist(day3['day3'],density=True,bins=30, edgecolor='black', color='white', linewidth=1.4) ax = sns.kdeplot(day3['day3'], color='black') ax = sns.kdeplot(day3['theoritical_normal'], color='red') ax = plt.xlim([0,day3['day3'].max() ]) plt.show() """## Some QQplots""" fig,ax = plt.subplots(figsize=(8,8)) _ = pg.qqplot(data['day1'], ax=ax, confidence=False) plt.show() fig,ax = plt.subplots(figsize=(8,8)) _ = pg.qqplot(day2['day2'], ax=ax, confidence=False) plt.show() fig,ax = plt.subplots(figsize=(10,10)) _ = pg.qqplot(day3['day3'], ax=ax, confidence=False) plt.show() """## Some descriptive stats""" print(stats.describe(data['day1'])) print(stats.describe(day2['day2'])) print(stats.describe(day3['day3']))
message = "Shapiro-Wilk Normality test is being perform" else: message = "Omnibus test of normality is being performed" st.success(message) normality = pg.normality(df, dv=x_var, group=y_var, method="normaltest" if normality_selected == "Omnibus test of normality" else "shapiro") st.write(normality) x1, x2 = df.groupby(y_var)[x_var].apply(list) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4)) ax1 = pg.qqplot(x1, ax=ax1) ax2 = pg.qqplot(x2, ax=ax2) st.pyplot(fig) st.success("Levene test for homoscedasticity of variances") homoscedasticity = pg.homoscedasticity(df, dv=x_var, group=y_var) st.write(homoscedasticity) if param_vs_nonparam == "Parametric tests (Student, Welch)": if homoscedasticity.loc["levene", "pval"] < 0.05: test_message = "Welch test results:" else: test_message = "Student t-test results:" st.success(test_message)
# sns.distplot(D[:,0], label="Proximal-thoracic") # sns.distplot(D[:,1], label="Main thoracic") # sns.distplot(D[:,2], label="Lumbar") sns.distplot(D.reshape(-1)) plt.xlabel("Difference in Cobb Angle (Degrees)") plt.ylabel("Density") # plt.legend() plt.title("Difference between Predicted and Ground-truth Cobb Angles") plt.show() ########## Shapiro-Wilk test ShapiroWilk = pg.normality(data=D.reshape(-1)) print(ShapiroWilk.to_string()) pg.qqplot(D.reshape(-1), dist='norm', sparams=(), confidence=0.95, figsize=(5, 4), ax=None) plt.figure() # sns.scatterplot(x=gt_angle_data[:,0], y=pred_angle_data[:,0], label="Proximal-thoracic") # sns.scatterplot(x=gt_angle_data[:,1], y=pred_angle_data[:,1], label="Main thoracic") # sns.scatterplot(x=gt_angle_data[:,2], y=pred_angle_data[:,2], label="Lumbar") sns.scatterplot(x=gt_angle_data.reshape(-1), y=pred_angle_data.reshape(-1)) plt.xlabel("Ground-truth Angle (Degrees)") plt.ylabel("Predicted Angle (Degrees)") # plt.legend() plt.title("Ground-truth vs. Predicted Cobb Angles") plt.show() ax = pg.plot_blandaltman(gt_angle_data.flatten(), pred_angle_data.flatten())
def main(result_dir_ref: str, result_dir_pp: str, plot_dir: str): """Generates qq-Plots of the testing results Args: result_dir_ref (Path): path to the reference data folder (without post-processing) result_dir_pp (Path): path to the data folder with post-processing plot_dir (Path): path to the desired result folder to store the qq-plots """ # get absolut path of the result directory result_dir_ref = Path(Path.cwd() / result_dir_ref) result_dir_pp = Path(Path.cwd() / result_dir_pp) plot_dir = Path(Path.cwd() / plot_dir) # load the data into pandas ref = pd.read_csv(Path(result_dir_ref / 'results.csv'), sep=';') pp = pd.read_csv(Path(result_dir_pp / 'results.csv'), sep=';') results = pd.concat([ref, pp]) # get two dataframes with reference an post-processed values ref = results[[ 'SUBJECT', 'LABEL', 'DICE', 'HDRFDST' ]][~results['SUBJECT'].str.contains('PP', na=False)].sort_values( by=['SUBJECT', 'LABEL']) pp = results[['SUBJECT', 'LABEL', 'DICE', 'HDRFDST']][results['SUBJECT'].str.contains( 'PP', na=False)].sort_values(by=['SUBJECT', 'LABEL']) # build data frame with differences in the metrics data = ref[['SUBJECT', 'LABEL']] data[['DIF_DICE', 'DIF_HDRFDST']] = ref[['DICE', 'HDRFDST']] - pp[['DICE', 'HDRFDST']] for label in data['LABEL'].unique(): #create a subfigure per label wit the two qq plots # fig, ax = plt.subplots(2, 2, sharey=True, sharex=True) fig, ax = plt.subplots(2, 3, figsize=(12, 8), sharey=True, sharex=True) fig.suptitle(f"Q-Q plots of {label}") # create q-q plot DICE pg.qqplot(ref['DICE'][ref.LABEL == label], ax=ax[0, 0]) ax[0, 0].set_title(f"Dice coefficient before post-processing") pg.qqplot(pp['DICE'][pp.LABEL == label], ax=ax[0, 1]) ax[0, 1].set_title(f"Dice coefficient after post-processing") pg.qqplot(pp['DICE'][pp.LABEL == label] - ref['DICE'][ref.LABEL == label], ax=ax[0, 2]) ax[0, 2].set_title(f"Difference in Dice coefficient") # create q-q plot HDRFDST pg.qqplot(ref['HDRFDST'][ref.LABEL == label], ax=ax[1, 0]) ax[1, 0].set_title(f"Hausdorff distance before post-processing") pg.qqplot(pp['HDRFDST'][pp.LABEL == label], ax=ax[1, 1]) ax[1, 1].set_title(f"Hausdorff distance after post-processing") pg.qqplot(pp['HDRFDST'][pp.LABEL == label] - ref['HDRFDST'][ref.LABEL == label], ax=ax[1, 2]) ax[1, 2].set_title(f"Difference in Hausdorff distance") # modify appearance of plot plt.subplots_adjust(hspace=0.5, wspace=0.5) for axis in ax.flatten(): axis.set_xlim([-1.6, 1.6]) axis.set_ylim([-1.6, 1.6]) axis.texts = [] lines = axis.get_lines() lines[0].set_color('black') lines[0].set_markerfacecolor('None') lines[1].set_color('black') lines[1].set_linestyle('--') lines[2].set_color('black') lines[3].set_color('grey') lines[4].set_color('grey') plt.savefig(Path(plot_dir / label)) plt.close()
left_index=True, right_index=True) summary_frame.head() # In[13]: _ = sns.scatterplot(y='standard_resid', x='standarized_prediction', data=summary_frame) _ = plt.axhline(y=0) # #### # This graph can be used for testing homogeneity of variance. We encountered this kind of plot previously; essentially, if it has a funnel shape then we’re in trouble. The plot we have shows points that are equally spread for the three groups, which implies that variances are similar across groups (which was also the conclusion reached by Levene’s test). # In[14]: _ = pg.qqplot(summary_frame['standard_resid'], confidence=False) # #### # The second plot is a Q-Q plot , which tells us something about the normality of residuals in the model. We want our residuals to be normally distributed, which means that the dots on the graph should cling to the diagonal line. Ours look like they have had a bit of an argument with the diagonal line, which suggests that we may not be able to assume normality of errors and should perhaps use a robust version of ANOVA instead. # In[15]: # Doing Welch anova in the case if homogeniety of variance is violated(our data here dont need this test) aov = pg.welch_anova(dv='libido', between='dose', data=df) aov # ## Robust ANOVA (for independent samples) # In[16]: st.kruskal(df_dose1['libido'], df_dose2['libido'], df_dose3['libido'])
# # QQ-plots for comparing final states distribution of degrees of freedom to normal distributions. # # The Filliben’s formula was used to estimate the theoretical quantiles for all QQ-plots. # %% fig_qq_sum = plot.generate_qq_plot(df, vars_=['sum'], width=800) # %% [markdown] # #### With Confidence Intervals # %% fig_qq_sum_ci, axes = plt.subplots(1, len(task_display_order), figsize=(14, 4)) for i, task in enumerate(task_display_order): pg.qqplot(df[df['task'] == task]['sum'], dist='norm', ax=axes[i]) axes[i].set_title(f"Task={task}") fig_qq_sum_ci.tight_layout() plt.savefig(reports_path / 'figures/qq-plot-sum_ci.pdf') # %% [markdown] # ### Histogram # %% fig_hist_sum = plot.generate_histograms(df[['task', 'sum']], by='task', x_title="Final State Sum Values", legend_title="Block Type", width=800) # %% [markdown]
# # Homoscedasticity H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname, method="levene") print(H**o) # print(Ble[Ble['variete']=='V1'].var()) # print(Ble[Ble['variete']=='V2'].var()) # print(Ble[Ble['variete']=='V3'].var()) # print(Ble[Ble['variete']=='V4'].var()) # Normality # Norm = pg.normality(data=Ble, dv=Yname, group=Xname, method="shapiro") # print(Norm) # Normality of residuals lm = pg.linear_regression(Ble[Xname].cat.codes, Ble[Yname]) Normall = pg.normality(lm.residuals_) print(Normall) plot = pg.qqplot(lm.residuals_, dist='norm') # OneWay Anova aov = Ble.anova(dv=Yname, between=Xname, detailed=True) print(aov) # Analysis of rdt vs variete Yname = 'rdt' Xname = 'phyto' print('\n******** %s vs %s **********' % (Yname, Xname)) # # Homoscedasticity H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname) print(H**o) # Normality # Norm = pg.normality(data=Ble, dv=Yname, group=Xname)
# The Filliben’s formula was used to estimate the theoretical quantiles for all QQ-plots. # %% fig_qq_dof = plot.generate_qq_plot(df, vars_=['df1', 'df2'], width=600, height=300) # %% [markdown] # #### Separately # %% fig_qq_grid, axes = plt.subplots(2, len(task_display_order), figsize=(14, 8)) for i, task in enumerate(task_display_order): pg.qqplot(df[df['task'] == task]['df1'], dist='norm', ax=axes[0, i]) pg.qqplot(df[df['task'] == task]['df2'], dist='norm', ax=axes[1, i]) axes[0, i].set(ylabel='Ordered quantiles (df1)', title=f"Task={task}") axes[1, i].set(ylabel='Ordered quantiles (df2)', title="") fig_qq_grid.tight_layout() plt.savefig(reports_path / 'figures/qq-plot-dof-grid.pdf') # %% [markdown] # ### Histograms # Histograms of final state values for df1 and df2 compared to normal distributions. # %% # Collect all histograms in a dictionary for later use of the keys as part of their file name when saving. histograms = dict() histograms['overall_dof'] = plot.generate_histograms(