Ejemplo n.º 1
0
def sign_barplot(df, val_col, group_col, test="HSD"):
    if test == "HSD":
        result_df = tukey_hsd(df, val_col, group_col)
    if test == "tukey":
        result_df = sp.posthoc_tukey(df, val_col, group_col)
    if test == "ttest":
        result_df = sp.posthoc_ttest(df, val_col, group_col)
    if test == "scheffe":
        result_df = sp.posthoc_scheffe(df, val_col, group_col)
    if test == "dscf":
        result_df = sp.posthoc_dscf(df, val_col, group_col)
    if test == "conover":
        result_df = sp.posthoc_conover(df, val_col, group_col)
    #マッピングのプロファイル
    fig, ax = plt.subplots(1, 2, figsize=(10, 6))
    cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef']
    heatmap_args = {
        'cmap': cmap,
        'linewidths': 0.25,
        'linecolor': '0.5',
        'clip_on': False,
        'square': True
    }

    sp.sign_plot(result_df, ax=ax[1], **heatmap_args)  #検定結果を描画

    sns.barplot(data=df, x=group_col, y=val_col, capsize=0.1,
                ax=ax[0])  #使ったデータを描画
    plt.show()
Ejemplo n.º 2
0
print(p_value)
#Hypothesis that the variances are equal can't be rejected and each sample has more than 5 individuals
#(p_value=0.28>0.05 ==> H0 failed to be rejected)
#All assumptions for Kruskal are respected

from scipy.stats import kruskal
H, p_value = kruskal(BECH_mean, SW_mean, SL_mean)
print(H)
print(p_value)
#p_value=0.002, it means the samples have different medians

data_dict = {'BECH': BECH_mean, 'SW': SW_mean, 'SL': SL_mean}
df = pd.DataFrame({k: pd.Series(v) for k, v in data_dict.items()})
df = df.melt(var_name='groups', value_name='values')
from scikit_posthocs import posthoc_dscf
posthoc_dscf(df, val_col='values', group_col='groups')
#output shows  each sample is different from the others:
#       BECH        SL        SW
#BECH -1.000  0.001000  0.001000
#SL    0.001 -1.000000  0.009617
#SW    0.001  0.009617 -1.000000

#paired p_values are all smaller than 0.05, so each sample is significantly different from the
#two others
import pylab
BoxName = ['BECH', 'SL', 'SW']
data = [BECH_mean, SL_mean, SW_mean]
pylab.xticks([1, 2, 3], BoxName)
plt.boxplot(data)
plt.savefig('MultipleBoxPlot.png')
plt.show()
Ejemplo n.º 3
0
################################################################
print('counter variance')
################################################################
summary_df["cv"] = summary_df.std_vel / summary_df.mean_vel

_, norm_p = stats.shapiro(summary_df.cv.dropna())
_, var_p = stats.levene(
    summary_df[summary_df.experiment_type == 'baseline'].cv.dropna(),
    summary_df[summary_df.experiment_type == 'control'].cv.dropna(),
    summary_df[summary_df.experiment_type == 'button'].cv.dropna(),
    summary_df[summary_df.experiment_type == 'touch'].cv.dropna(),
    center='median'
    )

if norm_p < 0.05 or var_p < 0.05:
    print('steel-dwass\n', sp.posthoc_dscf(summary_df, val_col='cv', group_col='experiment_type'))
else:
    multicomp_result = multicomp.MultiComparison(np.array(summary_df.dropna(how='any').cv, dtype="float64"), summary_df.dropna(how='any').experiment_type)
    print('levene', multicomp_result.tukeyhsd().summary())
print ('----', 'baseline', 'control', 'button', 'touch')
print('mean',
    summary_df[summary_df.experiment_type == 'baseline'].cv.dropna().mean(),
    summary_df[summary_df.experiment_type == 'control'].cv.dropna().mean(),
    summary_df[summary_df.experiment_type == 'button'].cv.dropna().mean(),
    summary_df[summary_df.experiment_type == 'touch'].cv.dropna().mean(),
)
print('var',
    summary_df[summary_df.experiment_type == 'baseline'].cv.dropna().std(),
    summary_df[summary_df.experiment_type == 'control'].cv.dropna().std(),
    summary_df[summary_df.experiment_type == 'button'].cv.dropna().std(),
    summary_df[summary_df.experiment_type == 'touch'].cv.dropna().std(),
Ejemplo n.º 4
0
    def show_statistics(self, feature_type):
        print("Focus on number of {}".format(feature_type))
        feature_df = pd.DataFrame(
            [data[feature_type] for data in self.feature.values()],
            index=self.categories.values()).T

        fig, ax = plt.subplots(figsize=(8, 4))
        ax.boxplot(feature_df.values, 0, "")
        ax.set_xticklabels(self.categories_en.values())
        plt.title('Box plot')
        plt.xlabel('category')
        plt.ylabel('number of {}'.format(feature_type))

        plt.show()
        print("========================================"
              "========================================")
        print("要約統計量")
        display(feature_df.describe())
        print("========================================"
              "========================================")
        print("データの正規性を調べる")
        print("Kolmogorov–Smirnov検定")
        ks_null_hypothesis = "データの分布は正規分布と差がない"
        ks_alternative_hypothesis = "データの分布は正規分布と差がある"
        print("帰無仮説: {}".format(ks_null_hypothesis))
        print("対立仮説: {}".format(ks_alternative_hypothesis))
        ks_a_percentage = self.ks_a_percentage
        print("有意水準: {}%({})".format(ks_a_percentage, ks_a_percentage * 0.01))
        ks_p_values = [
            st.kstest(data, "norm")[1] for data in feature_df.values.T
        ]
        print("p値")
        for cat, p_value in zip(self.categories.values(), ks_p_values):
            print("{}: {}".format(cat, p_value))
        if np.min(ks_p_values) < ks_a_percentage * 0.01:
            print("いずれかの群のデータに於いて,"
                  "帰無仮説が棄却されたため「{}」とは言えない,つまり「{}」".format(
                      ks_null_hypothesis, ks_alternative_hypothesis))
            print("=> ノンパラメトリック検定へ")
            print("======================================"
                  "==========================================")
            print("カテゴリ間に差があるか調べる")
            print("Kruskal-Wallis検定")
            kruskal_null_hypothesis = "カテゴリ間で代表値(中央値)に差はない"
            kruskal_alternative_hypothesis = "いずれかのカテゴリ間で"\
                "代表値(中央値)に差がある"
            print("帰無仮説: {}".format(kruskal_null_hypothesis))
            print("対立仮説: {}".format(kruskal_alternative_hypothesis))
            kruskal_a_percentage = self.kruskal_a_percentage
            print("有意水準: {}%({})".format(kruskal_a_percentage,
                                         kruskal_a_percentage * 0.01))
            kruskal_p_value = st.kruskal(
                *[data for data in feature_df.values.T])[1]
            print("p値: {}".format(kruskal_p_value))
            if kruskal_p_value < kruskal_a_percentage * 0.01:
                print("帰無仮説が棄却されたため,「{}」とは言えない,つまり「{}」".format(
                    kruskal_null_hypothesis, kruskal_alternative_hypothesis))
                print("======================================"
                      "==========================================")
                print("多重比較")
                print("Steel-Dwass検定")
                steel_dwass_null_hypothesis = "2つのカテゴリ間の"\
                    "代表値(中央値)に差はない"
                print("帰無仮説: {}".format(steel_dwass_null_hypothesis))
                steel_dwass_a_percentage = self.steel_dwass_a_percentage
                print("優位水準: {}%({})".format(steel_dwass_a_percentage,
                                             steel_dwass_a_percentage * 0.01))
                values = []
                groups = []
                categories = self.categories
                for data, cat in zip(feature_df.T.values, categories.values()):
                    groups.extend([cat for i in range(len(data))])
                    values.extend(data)
                posthoc_df = pd.DataFrame([values, groups],
                                          index=["value", "category"]).T
                print("以下p値の表")
                display(
                    sp.posthoc_dscf(posthoc_df,
                                    val_col="value",
                                    group_col="category"))
            else:
                print("帰無仮説が棄却されなかったため,"
                      "「{}」は誤っているとは言えない".format(kruskal_null_hypothesis))
                print("\n")
Ejemplo n.º 5
0
def steel_dwass(df, x, y, **kwargs):
    """
    TukeyHSD のノンパラメトリック版
    """
    return sp.posthoc_dscf(df, val_col=y, group_col=x, **kwargs)