Beispiel #1
0
def test_normality(df, dep_var, ind_vars):
    p_values = []

    for iv in ind_vars:
        df_iv = df.loc[df['Condition number'] == iv]

        results = pg.normality(df_iv[dep_var])
        p = list(results['pval'])[0]

        p_values.append(p)

    print('')
    return p_values
Beispiel #2
0
samp_time = pd.melt(
    si_samp,
    id_vars=['Subject', 'Group'],
    value_vars=['Time Object/New Cons Chamber', 'Time Conspecific Chamber'],
    var_name='Side',
    value_name='Time')

vars_time = [
    'Total Exploration', 'Time Conspecific Chamber',
    'Time Object/New Cons Chamber'
]
# Test for normality
for var_ in vars_time:
    print(f'Normality test (Shapiro), {var_}')
    print(pg.normality(si_samp, dv=var_, group='Group'))
#Get test phase data
test_df = si_raw[si_raw['Phase'] == 'Test']
test_df = detec_outlier(test_df, 'Total Exploration', 'Group')

test_time = pd.melt(
    test_df,
    id_vars=['Subject', 'Group'],
    value_vars=['Time Object/New Cons Chamber', 'Time Conspecific Chamber'],
    var_name='Side',
    value_name='Time')
#Test for normality
for var_ in vars_time:
    print(f'Normality test (Shapiro), {var_}')
    print(pg.normality(test_df, dv=var_, group='Group'))
Beispiel #3
0
                preds = df_preds.iloc[fold][col]
            f1 = metrics.f1_score(y_true=trues, y_pred=preds, average="micro")
            f1FoldDict[f"{num}_{col}"] = f1
            namesList.append(col)

    # add entries to per fold per variant F1 score data frame
    for name in namesList:
        foldList = list()
        for i in range(5):
            foldList.append(f1FoldDict[f"{i}_{name}"])
        dfCompare[f"{name}"] = foldList
#######################################
# start testing
# normality test
    _normality = open(f"_normality.txt", "w")
    print(pg.normality(dfCompare), file=_normality)

    # ANOVA is computed here
    df_melt = pd.melt(dfCompare.reset_index(),
                      id_vars=["index"],
                      value_vars=namesList)
    df_melt.columns = ["index", "treatments", "value"]
    model = ols('value ~ C(treatments)', data=df_melt).fit()
    # print(model.summary())
    anova_table = sm.stats.anova_lm(model, typ=2)
    print(anova_table)

    # pairways comparison
    tukey_ = open(f"./results/statistics/{target}_statistics.txt", "w+")

    if len(namesList) > 2:
Beispiel #4
0
print(
    'A idade média foi de 25 anos. Vamos criar um histograma e analisar a distribuição da variável idade.'
)
# Histograma
df_nba['Age'].plot.hist(bins=12, alpha=0.5)
plt.show()

# BoxPlot
ax = sns.boxplot(x=df_nba['Age'], palette="Set2", orient="h")
plt.ylabel('\nAtletas')
plt.xlabel('\nIdade')
plt.show()

# Teste de normalidade com Pingouin
x = df_nba['Age']
print(pg.normality(x))

# Agrupando os dados por jogador e total de pontos
df_nba_top10 = df_nba.groupby([
    'Player'
])['PTS'].sum().reset_index().rename(columns={'PTS': 'Total_Pontos'})

# Retornamos os Top 10
df_nba_top10 = df_nba_top10.nlargest(10, 'Total_Pontos')

# Visualiza os dados
print(df_nba_top10)


# Quantos jogos os jogadores com 35 anos de idade ou mais iniciaram (variável GS)?
def lista_jogadores35():
data_path = "../../data/combined_sample_data.xlsx"
combined_df = pd.read_excel(data_path,
                            sheet_name="SampleData",
                            index_col="SampleID")
metabolite_list = combined_df.columns[8:]

# Get list of metabolites for subject
subject_df = combined_df[combined_df['Subject'] == subject]
subject_df = subject_df[subject_df['TimeOfDay'] != 4]

alpha = 0.05
subject_diurnal_list = []
for metabolite in metabolite_list:
    log2_metabolite = np.log2(subject_df[metabolite])
    normal, shapiro_wilk_pval = pg.normality(log2_metabolite)

    lr_info = pg.linear_regression(subject_df['TimeOfDay'], log2_metabolite)
    lr_pval = lr_info.iloc[1]['pval']
    if filter_by_normal_dist:
        if lr_pval < alpha and normal:
            subject_diurnal_list.append(metabolite)
    else:
        if lr_pval < alpha:
            subject_diurnal_list.append(metabolite)
print(len(subject_diurnal_list))

# calcualte z-score

#subject_zscore_df = pd.DataFrame(zscore(np.log2(subject_df[subject_df.columns[9:]])),
#                                index=subject_df.index, columns=subject_df.columns[9:])
        if not analyzed_set.natat:
            tukey_ = open(
                f"./results/statistics/TREC6_{param}_{analyzed_set.xlabel}.txt",
                "w+")
        else:
            tukey_ = open(
                f"./results/statistics/MPD_{param}_{analyzed_set.xlabel}.txt",
                "w+")

        if len(file_list) > 2:
            m_comp = pairwise_tukeyhsd(endog=df_melt['value'],
                                       groups=df_melt['treatments'],
                                       alpha=0.05)
            print(f"{param}, NORMALITY:", file=tukey_)
            print(pg.normality(df_temp), file=tukey_)
            print("\n ANOVA:", file=tukey_)
            print(anova_table, file=tukey_)
            print("\n HSD Tukey:", file=tukey_)
            print(m_comp, file=tukey_)
            print(m_comp)
        elif len(file_list) < 3:
            print(f"{param}, NORMALITY:", file=tukey_)
            print(pg.normality(df_temp), file=tukey_)
            print("\n ANOVA:", file=tukey_)
            print(anova_table, file=tukey_)
            print("\n Wilcoxon:", file=tukey_)
            print(stats.wilcoxon(df_temp[analyzed_set.columns[0]],
                                 df_temp[analyzed_set.columns[1]]),
                  file=tukey_)
            print(
Beispiel #7
0
    st.header("Difference in means between groups results")
    st.success("Descriptive statistics are being calculated")
    function_dict = {x_var: ["mean", "std", "sem", "count"]}
    new = pd.DataFrame(df.groupby(y_var).aggregate(function_dict))
    st.write(new)

    if normality_selected == "Shapiro-Wilk":
        message = "Shapiro-Wilk Normality test is being perform"
    else:
        message = "Omnibus test of normality is being performed"

    st.success(message)

    normality = pg.normality(df,
                             dv=x_var,
                             group=y_var,
                             method="normaltest" if normality_selected
                             == "Omnibus test of normality" else "shapiro")
    st.write(normality)

    x1, x2 = df.groupby(y_var)[x_var].apply(list)
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
    ax1 = pg.qqplot(x1, ax=ax1)
    ax2 = pg.qqplot(x2, ax=ax2)
    st.pyplot(fig)

    st.success("Levene test for homoscedasticity of variances")
    homoscedasticity = pg.homoscedasticity(df, dv=x_var, group=y_var)
    st.write(homoscedasticity)

    if param_vs_nonparam == "Parametric tests (Student, Welch)":
corr = pg.corr(pred_angle_data.reshape(-1), gt_angle_data.reshape(-1))
print(corr.to_string())

plt.figure()
# sns.distplot(D[:,0], label="Proximal-thoracic")
# sns.distplot(D[:,1], label="Main thoracic")
# sns.distplot(D[:,2], label="Lumbar")
sns.distplot(D.reshape(-1))
plt.xlabel("Difference in Cobb Angle (Degrees)")
plt.ylabel("Density")
# plt.legend()
plt.title("Difference between Predicted and Ground-truth Cobb Angles")
plt.show()

########## Shapiro-Wilk test
ShapiroWilk = pg.normality(data=D.reshape(-1))
print(ShapiroWilk.to_string())
pg.qqplot(D.reshape(-1),
          dist='norm',
          sparams=(),
          confidence=0.95,
          figsize=(5, 4),
          ax=None)

plt.figure()
# sns.scatterplot(x=gt_angle_data[:,0], y=pred_angle_data[:,0], label="Proximal-thoracic")
# sns.scatterplot(x=gt_angle_data[:,1], y=pred_angle_data[:,1], label="Main thoracic")
# sns.scatterplot(x=gt_angle_data[:,2], y=pred_angle_data[:,2], label="Lumbar")
sns.scatterplot(x=gt_angle_data.reshape(-1), y=pred_angle_data.reshape(-1))
plt.xlabel("Ground-truth Angle (Degrees)")
plt.ylabel("Predicted Angle (Degrees)")
    fig2.savefig('{}/Comparison.pdf'.format(savedir))
    fig2.savefig('{}/Comparison.png'.format(savedir))

import pingouin as pg
from scipy import stats

#Now stats and group data
mainfig, mainplot = plt.subplots(1, 1)
mainfig.suptitle('Average Amplitudes distributions')
mainplot.set_ylabel('Average Amplitudes (pA)')

sn.boxplot(data=flattened_maps, ax=mainplot, palette=colors)
sn.swarmplot(data=flattened_maps, ax=mainplot, color='black', size=2)

normality = pg.normality(flattened_maps)

if normality['normal'][0] == False:
    groupStat = stats.kruskal(flattened_maps.values[:, 0],
                              flattened_maps.values[:, 1],
                              flattened_maps.values[:, 2],
                              flattened_maps.values[:, 3])
    print('Normality failed, KW test (pvalue)={}'.format(groupStat[1]))

    if groupStat[1] < 0.05:

        for group in groups:

            controlGroup = flattened_maps['P30P40'].values
            compareGroup = flattened_maps[group].values
                      kind="point",
                      dodge=True,
                      height=4,
                      aspect=1.333)

fig_filepath = figures_path / 'line-plot-dVz.pdf'
plt.savefig(str(fig_filepath))
logging.info(f"Written figure to {fig_filepath.resolve()}")

# %% [markdown]
# ### Normality of Fisher-z-transformed Synergy Index
# Make sure the tranformation worked.

# %%
norm_dVz = df.groupby('task')['dVz'].apply(
    lambda x: pg.normality(x).iloc[0]).unstack(level=1)

# %% [markdown]
# ### Mixed ANOVA
#

# %%
anova_dVz = analysis.mixed_anova_synergy_index_z(df)

# %% [markdown]
# ## Posthoc Testing

# %%
posthoc_comparisons = analysis.posthoc_ttests(df)

# %%
Beispiel #11
0
            width=.5)
plt.legend(frameon=False, loc='lower right')
plt.xlabel('Treatment')
plt.tight_layout()
#%%
nest_fig.savefig(
    '/Users/labc02/Documents/PDCB_data/MK-project/Figures/nesting_fig.png',
    dpi=600)

burrow_raw = pd.read_csv(
    '/Users/labc02/Documents/PDCB_data/MK-project/Burrowing.csv')
burrow_raw
burrow_raw['Group'] = burrow_raw['Genotype'] + '_' + burrow_raw['Tx']
for tx in burrow_raw['Tx'].unique():
    print(tx)
    print(pg.normality(data=burrow_raw, dv='% Test (12 h)', group='Genotype'))
# Check homoscedasticity
pg.homoscedasticity(data=burrow_raw, dv='% Test (12 h)', group='Group')

burr_kw = pg.kruskal(data=burrow_raw, dv='% Test (12 h)', between='Group')
burr_kw
#%%
burr_fig = plt.figure(figsize=(4, 4))
sns.boxplot(x='Tx',
            y='% Test (12 h)',
            hue='Genotype',
            data=burrow_raw,
            palette=['forestgreen', 'royalblue'],
            showmeans=True,
            meanprops={
                'marker': '+',
print('\n******** %s vs %s **********' % (Yname, Xname))
plot = sns.boxplot(x=Xname, y=Yname, data=Ble, hue='phyto')
#
# Homoscedasticity
H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname, method="levene")
print(H**o)
# print(Ble[Ble['variete']=='V1'].var())
# print(Ble[Ble['variete']=='V2'].var())
# print(Ble[Ble['variete']=='V3'].var())
# print(Ble[Ble['variete']=='V4'].var())
# Normality
# Norm = pg.normality(data=Ble, dv=Yname, group=Xname, method="shapiro")
# print(Norm)
# Normality of residuals
lm = pg.linear_regression(Ble[Xname].cat.codes, Ble[Yname])
Normall = pg.normality(lm.residuals_)
print(Normall)
plot = pg.qqplot(lm.residuals_, dist='norm')

# OneWay Anova
aov = Ble.anova(dv=Yname, between=Xname, detailed=True)
print(aov)

# Analysis of rdt vs variete
Yname = 'rdt'
Xname = 'phyto'
print('\n******** %s vs %s **********' % (Yname, Xname))
#
# Homoscedasticity
H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname)
print(H**o)
                           sheet_name='io_STATS_foram_only',
                           usecols="B:Q")
io_2050rcp8p5 = pd.read_excel('genie_outpout.xlsx',
                              sheet_name='io_STATS_foram_only',
                              usecols="T:AI")
io_2100rcp8p5 = pd.read_excel('genie_outpout.xlsx',
                              sheet_name='io_STATS_foram_only',
                              usecols="AL:BA")
io_2100rcp6 = pd.read_excel('genie_outpout.xlsx',
                            sheet_name='io_STATS_foram_only',
                            usecols="BD:BS")

#################################################################################################################################################################
#check for the normality of the dataset
#subpolar
nor_subpolar_present = pg.normality(subpolar_present)
nor_subpolar_2050rcp8p5 = pg.normality(subpolar_2050rcp8p5)
nor_subpolar_2100rcp8p5 = pg.normality(subpolar_present)
#nor_subpolar_2100rcp6   = pg.normality(subpolar_present)
#temperate
nor_temp_present = pg.normality(temp_present)
nor_temp_2050rcp8p5 = pg.normality(temp_2050rcp8p5)
nor_temp_2100rcp8p5 = pg.normality(temp_present)
nor_temp_2100rcp6 = pg.normality(temp_present)
#tropics
nor_trop_present = pg.normality(trop_present)
nor_trop_2050rcp8p5 = pg.normality(trop_2050rcp8p5)
nor_trop_2100rcp8p5 = pg.normality(trop_present)
nor_trop_2100rcp6 = pg.normality(trop_present)
#Indian Ocean
#temperate