예제 #1
0
def sign_barplot(df, val_col, group_col, test="HSD"):
    if test == "HSD":
        result_df = tukey_hsd(df, val_col, group_col)
    if test == "tukey":
        result_df = sp.posthoc_tukey(df, val_col, group_col)
    if test == "ttest":
        result_df = sp.posthoc_ttest(df, val_col, group_col)
    if test == "scheffe":
        result_df = sp.posthoc_scheffe(df, val_col, group_col)
    if test == "dscf":
        result_df = sp.posthoc_dscf(df, val_col, group_col)
    if test == "conover":
        result_df = sp.posthoc_conover(df, val_col, group_col)
    #マッピングのプロファイル
    fig, ax = plt.subplots(1, 2, figsize=(10, 6))
    cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef']
    heatmap_args = {
        'cmap': cmap,
        'linewidths': 0.25,
        'linecolor': '0.5',
        'clip_on': False,
        'square': True
    }

    sp.sign_plot(result_df, ax=ax[1], **heatmap_args)  #検定結果を描画

    sns.barplot(data=df, x=group_col, y=val_col, capsize=0.1,
                ax=ax[0])  #使ったデータを描画
    plt.show()
예제 #2
0
def anova_posthoc_tests(benchmark_snapshot_df):
    """Returns p-value tables for various ANOVA posthoc tests.

    Results should considered only if ANOVA test rejects null hypothesis.
    """
    common_args = {
        'a': benchmark_snapshot_df,
        'group_col': 'fuzzer',
        'val_col': 'edges_covered',
        'sort': True
    }
    p_adjust = 'holm'

    posthoc_tests = {}
    posthoc_tests['student'] = sp.posthoc_ttest(**common_args,
                                                equal_var=False,
                                                p_adjust=p_adjust)
    posthoc_tests['turkey'] = sp.posthoc_tukey(**common_args)
    return posthoc_tests
예제 #3
0
def post_hoc_df(df, Y_col, X_col, posthoc="tukey", alpha=0.05):
    """
    Returns a df with pairwise comparisons with reject column calculated according to alpha
    
    TODO: Add more posthoc tests to this function
    """
    if posthoc == "Statsmodels_tukey":
        comp = multi.MultiComparison(df[Y_col], df['comb'])
        results = comp.tukeyhsd(alpha=alpha)
        results = pd.DataFrame(data=results._results_table.data[1:],
                               columns=results._results_table.data[0])
    if posthoc == "dunn":
        results = scikit_results_munger(
            sp.posthoc_dunn(df,
                            val_col=Y_col,
                            group_col=X_col,
                            p_adjust='holm'), alpha)
    if posthoc == "tukey":
        results = scikit_results_munger(
            sp.posthoc_tukey(df, val_col=Y_col, group_col=X_col), alpha)
    return results
예제 #4
0
training_time = training_time.sort_values('lab_number')

#  statistics
# Test normality
_, normal = stats.normaltest(training_time['sessions'])
if normal < 0.05:
    kruskal = stats.kruskal(*[group['sessions'].values
                              for name, group in training_time.groupby('lab')])
    if kruskal[1] < 0.05:  # Proceed to posthocs
        posthoc = sp.posthoc_dunn(training_time, val_col='sessions',
                                  group_col='lab_number')
else:
    anova = stats.f_oneway(*[group['sessions'].values
                             for name, group in training_time.groupby('lab')])
    if anova[1] < 0.05:
        posthoc = sp.posthoc_tukey(training_time, val_col='sessions',
                                   group_col='lab_number')


# %% PLOT

# Set figure style and color palette
use_palette = [[0.6, 0.6, 0.6]] * len(np.unique(training_time['lab']))
use_palette = use_palette + [[1, 1, 0.2]]
lab_colors = group_colors()

# Add all mice to dataframe seperately for plotting
training_time_no_all = training_time.copy()
training_time_no_all.loc[training_time_no_all.shape[0] + 1, 'lab_number'] = 'All'
training_time_all = training_time.copy()
training_time_all['lab_number'] = 'All'
training_time_all = training_time.append(training_time_all)
예제 #5
0
print(
    levene(all_data["Gaussian"].tolist(), all_data["Hessian"].tolist(),
           all_data["Laplacian"].tolist(), all_data["Ilastik"].tolist(),
           all_data["MitoSegNet"].tolist(),
           all_data["Finetuned\nFiji U-Net"].tolist()))

print(
    f_oneway(all_data["Gaussian"].tolist(), all_data["Hessian"].tolist(),
             all_data["Laplacian"].tolist(), all_data["Ilastik"].tolist(),
             all_data["MitoSegNet"].tolist(),
             all_data["Finetuned\nFiji U-Net"].tolist()))

x = all_data
x = x.melt(var_name='groups', value_name='values')

pht = posthoc_tukey(x, val_col='values', group_col='groups')
print(pht)
#pht.to_excel("fod_posthoc.xlsx")
"""
for seg in seg_list:
    if seg != "MitoSegNet":
        print(seg, cohens_d(all_data[seg], all_data["MitoSegNet"]))
"""

#n = sb.violinplot(data=all_data, color="white", inner=None)#.set(ylabel="Percent of wrongly\nsegmented objects")
n = sb.swarmplot(data=all_data, color="black")
sb.boxplot(data=all_data, color="white", fliersize=0)

n.set_ylabel("Missing objects", fontsize=32)

#n.set_ylabel("Percent of missing objects", fontsize=18)
            group[var].values for name, group in test_df.groupby('lab_number')
        ])
        if test[1] < 0.05:  # Proceed to posthocs
            posthoc = sp.posthoc_dunn(test_df,
                                      val_col=var,
                                      group_col='lab_number')
        else:
            posthoc = np.nan
    else:
        test_type = 'anova'
        test = stats.f_oneway(*[
            group[var].values for name, group in test_df.groupby('lab_number')
        ])
        if test[1] < 0.05:
            posthoc = sp.posthoc_tukey(test_df,
                                       val_col=var,
                                       group_col='lab_number')
        else:
            posthoc = np.nan

    posthoc_tests['posthoc_' + str(var)] = posthoc
    stats_tests.loc[i, 'variable'] = var
    stats_tests.loc[i, 'test_type'] = test_type
    stats_tests.loc[i, 'p_value'] = test[1]

# Z-score data
learned_zs = pd.DataFrame()
learned_zs['lab'] = learned['lab']
learned_zs['lab_number'] = learned['lab_number']
learned_zs['Performance'] = stats.zscore(learned['perf_easy'])
learned_zs['Threshold'] = stats.zscore(learned['threshold'])
            group[var].values for name, group in learned.groupby('lab_number')
        ])
        if test[1] < 0.05:  # Proceed to posthocs
            posthoc = sp.posthoc_dunn(learned,
                                      val_col=var,
                                      group_col='lab_number')
        else:
            posthoc = np.nan
    else:
        test_type = 'anova'
        test = stats.f_oneway(*[
            group[var].values for name, group in learned.groupby('lab_number')
        ])
        if test[1] < 0.05:
            posthoc = sp.posthoc_tukey(learned,
                                       val_col=var,
                                       group_col='lab_number')
        else:
            posthoc = np.nan

    posthoc_tests['posthoc_' + str(var)] = posthoc
    stats_tests.loc[i, 'variable'] = var
    stats_tests.loc[i, 'test_type'] = test_type
    stats_tests.loc[i, 'p_value'] = test[1]

if (stats.normaltest(learned['n_trials'])[1] < 0.05
        or stats.normaltest(learned['reaction_time'])[1] < 0.05):
    test_type = 'spearman'
    correlation_coef, correlation_p = stats.spearmanr(learned['reaction_time'],
                                                      learned['n_trials'])
if (stats.normaltest(learned['n_trials'])[1] > 0.05
    _, normal = stats.normaltest(test_fits[var])

    if normal < 0.05:
        test_type = 'kruskal'
        test = stats.kruskal(
            *[group[var].values for name, group in test_fits.groupby('lab')])
        if test[1] < 0.05:  # Proceed to posthocs
            posthoc = sp.posthoc_dunn(test_fits, val_col=var, group_col='lab')
        else:
            posthoc = np.nan
    else:
        test_type = 'anova'
        test = stats.f_oneway(
            *[group[var].values for name, group in test_fits.groupby('lab')])
        if test[1] < 0.05:
            posthoc = sp.posthoc_tukey(test_fits, val_col=var, group_col='lab')
        else:
            posthoc = np.nan

    posthoc_tests['posthoc_' + str(var)] = posthoc
    stats_tests.loc[i, 'variable'] = var
    stats_tests.loc[i, 'test_type'] = test_type
    stats_tests.loc[i, 'p_value'] = test[1]

# Correct for multiple tests
stats_tests['p_value'] = multipletests(stats_tests['p_value'])[1]

# %% Prepare for plotting

# Sort by lab number
biased_fits = biased_fits.sort_values('lab')
    _, normal = stats.normaltest(biased_fits[var])

    if normal < 0.05:
        test_type = 'kruskal'
        test = stats.kruskal(*[group[var].values
                               for name, group in biased_fits.groupby('lab')])
        if test[1] < 0.05:  # Proceed to posthocs
            posthoc = sp.posthoc_dunn(biased_fits, val_col=var, group_col='lab')
        else:
            posthoc = np.nan
    else:
        test_type = 'anova'
        test = stats.f_oneway(*[group[var].values
                                for name, group in biased_fits.groupby('lab')])
        if test[1] < 0.05:
            posthoc = sp.posthoc_tukey(biased_fits, val_col=var, group_col='lab')
        else:
            posthoc = np.nan

    posthoc_tests['posthoc_'+str(var)] = posthoc
    stats_tests.loc[i, 'variable'] = var
    stats_tests.loc[i, 'test_type'] = test_type
    stats_tests.loc[i, 'p_value'] = test[1]

# Correct for multiple tests
stats_tests['p_value'] = multipletests(stats_tests['p_value'])[1]

# Test between left/right blocks
for i, var in enumerate(['threshold', 'lapselow', 'lapsehigh', 'bias']):
    stats_tests.loc[stats_tests.shape[0] + 1, 'variable'] = '%s_blocks' % var
    stats_tests.loc[stats_tests.shape[0], 'test_type'] = 'wilcoxon'
예제 #10
0
             all_data["Laplacian"].tolist(), all_data["Ilastik"].tolist(),
             all_data["MitoSegNet"].tolist(),
             all_data["Finetuned\nFiji U-Net"].tolist()))

x = [
    all_data["Gaussian"].tolist(), all_data["Hessian"].tolist(),
    all_data["Laplacian"].tolist(), all_data["Ilastik"].tolist(),
    all_data["MitoSegNet"].tolist(),
    all_data["Finetuned\nFiji U-Net"].tolist()
]

#print(posthoc_tukey(x))

x = all_data
x = x.melt(var_name='groups', value_name='values')
print(posthoc_tukey(x, val_col='values', group_col='groups'))

print("\n")

print(np.average(all_data["Gaussian"]))
print(np.average(all_data["Hessian"]))
print(np.average(all_data["Laplacian"]))
print(np.average(all_data["Ilastik"]))
print(np.average(all_data["MitoSegNet"]))
print(np.average(all_data["Finetuned\nFiji U-Net"]))

print("\n")

#print(kruskal(all_data["Gaussian"].tolist(), all_data["Hessian"].tolist(), all_data["Laplacian"].tolist(),
#              all_data["Ilastik"].tolist(), all_data["MitoSegNet"].tolist(), all_data["Finetuned\nFiji U-Net"].tolist()))
예제 #11
0
co.summary()

# ### another library for post-hoc tests
# #### # https://scikit-posthocs.readthedocs.io/en/latest/posthocs_api/

# In[27]:

import scikit_posthocs as sk_ph

# In[28]:

sk_ph.posthoc_tukey_hsd(df['libido'], df['dose'])

# In[29]:

sk_ph.posthoc_tukey(df, val_col='libido', group_col='dose')

# ## Robust Post-hoc test

# In[30]:

sk_ph.posthoc_wilcoxon(df, val_col='libido', group_col='dose')

# #### # from above table it seems that groups (1,3) and (2,3) are significant.
#

# In[31]:

anova_table

# In[32]: