Python qqplot Beispiele, pingouin.qqplot Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: stylised_facts.py Projekt: mvgjorge/hft-abm-smc-abc

 def qq_plot(self):
     ax = pg.qqplot(
         self.log_returns,
         dist='norm',
         confidence=False,
     )
     plt.show()

Beispiel #2

0

Datei anzeigen

Datei: 01_regression.py Projekt: snehilk1312/AppliedStatistics

prediction['standarized_prediction'] = (
    prediction['predicted'] -
    prediction['predicted'].mean()) / prediction['predicted'].std()
final_summary.head()

# In[89]:

_ = sns.scatterplot(x=final_summary['standard_resid'],
                    y=prediction['standarized_prediction'])
_ = plt.axhline(y=0)
plt.show()

# In[90]:

_ = pg.qqplot(final_summary['standard_resid'])
plt.show()

# In[99]:

fig, ax = plt.subplots(figsize=(6, 4))
ax = plt.hist(final_summary['student_resid'],
              density=True,
              bins=30,
              edgecolor='black',
              linewidth=1.4)
plt.xlabel('student_resid', fontsize=14)
plt.show()

# ##### this assumption was also met

Beispiel #3

0

Datei anzeigen

Datei: 01_qqplot_histogram_describe_stats_assumptions.py Projekt: snehilk1312/AppliedStatistics

ax = sns.kdeplot(day2['theoritical_normal'], color='red')
ax = plt.xlim([0,day2['day2'].max() ])
plt.show()

# Day 3
fig,ax = plt.subplots(figsize=(12, 8))
ax = plt.hist(day3['day3'],density=True,bins=30, edgecolor='black', color='white', linewidth=1.4)
ax = sns.kdeplot(day3['day3'], color='black')
ax = sns.kdeplot(day3['theoritical_normal'], color='red')
ax = plt.xlim([0,day3['day3'].max() ])
plt.show()

"""## Some QQplots"""

fig,ax = plt.subplots(figsize=(8,8))
_ = pg.qqplot(data['day1'], ax=ax, confidence=False)
plt.show()

fig,ax = plt.subplots(figsize=(8,8))
_ = pg.qqplot(day2['day2'], ax=ax, confidence=False)
plt.show()

fig,ax = plt.subplots(figsize=(10,10))
_ = pg.qqplot(day3['day3'], ax=ax, confidence=False)
plt.show()

"""## Some descriptive stats"""

print(stats.describe(data['day1']))
print(stats.describe(day2['day2']))
print(stats.describe(day3['day3']))

Beispiel #4

0

Datei anzeigen

Datei: status.py Projekt: lostmachine18/Status

        message = "Shapiro-Wilk Normality test is being perform"
    else:
        message = "Omnibus test of normality is being performed"

    st.success(message)

    normality = pg.normality(df,
                             dv=x_var,
                             group=y_var,
                             method="normaltest" if normality_selected
                             == "Omnibus test of normality" else "shapiro")
    st.write(normality)

    x1, x2 = df.groupby(y_var)[x_var].apply(list)
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
    ax1 = pg.qqplot(x1, ax=ax1)
    ax2 = pg.qqplot(x2, ax=ax2)
    st.pyplot(fig)

    st.success("Levene test for homoscedasticity of variances")
    homoscedasticity = pg.homoscedasticity(df, dv=x_var, group=y_var)
    st.write(homoscedasticity)

    if param_vs_nonparam == "Parametric tests (Student, Welch)":
        if homoscedasticity.loc["levene", "pval"] < 0.05:
            test_message = "Welch test results:"
        else:
            test_message = "Student t-test results:"

        st.success(test_message)

Beispiel #5

0

Datei anzeigen

Datei: results_cobb.py Projekt: sineadellison1/automatic-scoliosis-assessment

# sns.distplot(D[:,0], label="Proximal-thoracic")
# sns.distplot(D[:,1], label="Main thoracic")
# sns.distplot(D[:,2], label="Lumbar")
sns.distplot(D.reshape(-1))
plt.xlabel("Difference in Cobb Angle (Degrees)")
plt.ylabel("Density")
# plt.legend()
plt.title("Difference between Predicted and Ground-truth Cobb Angles")
plt.show()

########## Shapiro-Wilk test
ShapiroWilk = pg.normality(data=D.reshape(-1))
print(ShapiroWilk.to_string())
pg.qqplot(D.reshape(-1),
          dist='norm',
          sparams=(),
          confidence=0.95,
          figsize=(5, 4),
          ax=None)

plt.figure()
# sns.scatterplot(x=gt_angle_data[:,0], y=pred_angle_data[:,0], label="Proximal-thoracic")
# sns.scatterplot(x=gt_angle_data[:,1], y=pred_angle_data[:,1], label="Main thoracic")
# sns.scatterplot(x=gt_angle_data[:,2], y=pred_angle_data[:,2], label="Lumbar")
sns.scatterplot(x=gt_angle_data.reshape(-1), y=pred_angle_data.reshape(-1))
plt.xlabel("Ground-truth Angle (Degrees)")
plt.ylabel("Predicted Angle (Degrees)")
# plt.legend()
plt.title("Ground-truth vs. Predicted Cobb Angles")
plt.show()

ax = pg.plot_blandaltman(gt_angle_data.flatten(), pred_angle_data.flatten())

Beispiel #6

0

Datei anzeigen

def main(result_dir_ref: str, result_dir_pp: str, plot_dir: str):
    """Generates qq-Plots of the testing results

    Args:
        result_dir_ref (Path): path to the reference data folder (without post-processing)
        result_dir_pp (Path): path to the data folder with post-processing
        plot_dir (Path): path to the desired result folder to store the qq-plots

    """

    # get absolut path of the result directory
    result_dir_ref = Path(Path.cwd() / result_dir_ref)
    result_dir_pp = Path(Path.cwd() / result_dir_pp)
    plot_dir = Path(Path.cwd() / plot_dir)

    # load the data into pandas
    ref = pd.read_csv(Path(result_dir_ref / 'results.csv'), sep=';')
    pp = pd.read_csv(Path(result_dir_pp / 'results.csv'), sep=';')

    results = pd.concat([ref, pp])

    # get two dataframes with reference an post-processed values
    ref = results[[
        'SUBJECT', 'LABEL', 'DICE', 'HDRFDST'
    ]][~results['SUBJECT'].str.contains('PP', na=False)].sort_values(
        by=['SUBJECT', 'LABEL'])
    pp = results[['SUBJECT', 'LABEL', 'DICE',
                  'HDRFDST']][results['SUBJECT'].str.contains(
                      'PP', na=False)].sort_values(by=['SUBJECT', 'LABEL'])

    # build data frame with differences in the metrics
    data = ref[['SUBJECT', 'LABEL']]
    data[['DIF_DICE',
          'DIF_HDRFDST']] = ref[['DICE', 'HDRFDST']] - pp[['DICE', 'HDRFDST']]

    for label in data['LABEL'].unique():

        #create a subfigure per label wit the two qq plots
        # fig, ax = plt.subplots(2, 2, sharey=True, sharex=True)
        fig, ax = plt.subplots(2, 3, figsize=(12, 8), sharey=True, sharex=True)
        fig.suptitle(f"Q-Q plots of {label}")

        # create q-q plot DICE
        pg.qqplot(ref['DICE'][ref.LABEL == label], ax=ax[0, 0])
        ax[0, 0].set_title(f"Dice coefficient before post-processing")

        pg.qqplot(pp['DICE'][pp.LABEL == label], ax=ax[0, 1])
        ax[0, 1].set_title(f"Dice coefficient after post-processing")

        pg.qqplot(pp['DICE'][pp.LABEL == label] -
                  ref['DICE'][ref.LABEL == label],
                  ax=ax[0, 2])
        ax[0, 2].set_title(f"Difference in Dice coefficient")

        # create q-q plot HDRFDST
        pg.qqplot(ref['HDRFDST'][ref.LABEL == label], ax=ax[1, 0])
        ax[1, 0].set_title(f"Hausdorff distance before post-processing")

        pg.qqplot(pp['HDRFDST'][pp.LABEL == label], ax=ax[1, 1])
        ax[1, 1].set_title(f"Hausdorff distance after post-processing")

        pg.qqplot(pp['HDRFDST'][pp.LABEL == label] -
                  ref['HDRFDST'][ref.LABEL == label],
                  ax=ax[1, 2])
        ax[1, 2].set_title(f"Difference in Hausdorff distance")

        # modify appearance of plot
        plt.subplots_adjust(hspace=0.5, wspace=0.5)

        for axis in ax.flatten():
            axis.set_xlim([-1.6, 1.6])
            axis.set_ylim([-1.6, 1.6])
            axis.texts = []
            lines = axis.get_lines()
            lines[0].set_color('black')
            lines[0].set_markerfacecolor('None')
            lines[1].set_color('black')
            lines[1].set_linestyle('--')

            lines[2].set_color('black')
            lines[3].set_color('grey')
            lines[4].set_color('grey')

        plt.savefig(Path(plot_dir / label))
        plt.close()

Beispiel #7

0

Datei anzeigen

Datei: 1_Anova.py Projekt: snehilk1312/AppliedStatistics

                         left_index=True,
                         right_index=True)
summary_frame.head()

# In[13]:

_ = sns.scatterplot(y='standard_resid',
                    x='standarized_prediction',
                    data=summary_frame)
_ = plt.axhline(y=0)

# #### # This graph can be used for testing homogeneity of variance. We encountered this kind of plot previously; essentially, if it has a funnel shape then we’re in trouble. The plot we have shows points that are equally spread for the three groups, which implies that variances are similar across groups (which was also the conclusion reached by Levene’s test).

# In[14]:

_ = pg.qqplot(summary_frame['standard_resid'], confidence=False)

# #### # The second plot is a Q-Q plot , which tells us something about the normality of residuals in the model. We want our residuals to be normally distributed, which means that the dots on the graph should cling  to the diagonal line. Ours look like they have had a bit of an argument with the diagonal line, which suggests that we may not be able to assume normality of errors and should perhaps use a robust version of ANOVA instead.

# In[15]:

# Doing Welch anova in the case if homogeniety of variance  is violated(our data here dont need this test)
aov = pg.welch_anova(dv='libido', between='dose', data=df)
aov

# ## Robust ANOVA (for independent samples)

# In[16]:

st.kruskal(df_dose1['libido'], df_dose2['libido'], df_dose3['libido'])

Beispiel #8

0

Datei anzeigen

#
# QQ-plots for comparing final states distribution of degrees of freedom to normal distributions.
#
# The Filliben’s formula was used to estimate the theoretical quantiles for all QQ-plots.

# %%
fig_qq_sum = plot.generate_qq_plot(df, vars_=['sum'], width=800)

# %% [markdown]
# #### With Confidence Intervals

# %%

fig_qq_sum_ci, axes = plt.subplots(1, len(task_display_order), figsize=(14, 4))
for i, task in enumerate(task_display_order):
    pg.qqplot(df[df['task'] == task]['sum'], dist='norm', ax=axes[i])
    axes[i].set_title(f"Task={task}")
fig_qq_sum_ci.tight_layout()
plt.savefig(reports_path / 'figures/qq-plot-sum_ci.pdf')

# %% [markdown]
# ### Histogram

# %%
fig_hist_sum = plot.generate_histograms(df[['task', 'sum']],
                                        by='task',
                                        x_title="Final State Sum Values",
                                        legend_title="Block Type",
                                        width=800)

# %% [markdown]

Beispiel #9

0

Datei anzeigen

Datei: data_ble.py Projekt: terman37/DSTI-Statistical-Analysis

#
# Homoscedasticity
H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname, method="levene")
print(H**o)
# print(Ble[Ble['variete']=='V1'].var())
# print(Ble[Ble['variete']=='V2'].var())
# print(Ble[Ble['variete']=='V3'].var())
# print(Ble[Ble['variete']=='V4'].var())
# Normality
# Norm = pg.normality(data=Ble, dv=Yname, group=Xname, method="shapiro")
# print(Norm)
# Normality of residuals
lm = pg.linear_regression(Ble[Xname].cat.codes, Ble[Yname])
Normall = pg.normality(lm.residuals_)
print(Normall)
plot = pg.qqplot(lm.residuals_, dist='norm')

# OneWay Anova
aov = Ble.anova(dv=Yname, between=Xname, detailed=True)
print(aov)

# Analysis of rdt vs variete
Yname = 'rdt'
Xname = 'phyto'
print('\n******** %s vs %s **********' % (Yname, Xname))
#
# Homoscedasticity
H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname)
print(H**o)
# Normality
# Norm = pg.normality(data=Ble, dv=Yname, group=Xname)

Beispiel #10

0

Datei anzeigen

# The Filliben’s formula was used to estimate the theoretical quantiles for all QQ-plots.

# %%
fig_qq_dof = plot.generate_qq_plot(df,
                                   vars_=['df1', 'df2'],
                                   width=600,
                                   height=300)

# %% [markdown]
# #### Separately

# %%

fig_qq_grid, axes = plt.subplots(2, len(task_display_order), figsize=(14, 8))
for i, task in enumerate(task_display_order):
    pg.qqplot(df[df['task'] == task]['df1'], dist='norm', ax=axes[0, i])
    pg.qqplot(df[df['task'] == task]['df2'], dist='norm', ax=axes[1, i])
    axes[0, i].set(ylabel='Ordered quantiles (df1)', title=f"Task={task}")
    axes[1, i].set(ylabel='Ordered quantiles (df2)', title="")
fig_qq_grid.tight_layout()
plt.savefig(reports_path / 'figures/qq-plot-dof-grid.pdf')

# %% [markdown]
# ### Histograms
# Histograms of final state values for df1 and df2 compared to normal distributions.

# %%
# Collect all histograms in a dictionary for later use of the keys as part of their file name when saving.
histograms = dict()

histograms['overall_dof'] = plot.generate_histograms(