Ejemplo n.º 1
0
def alpha_analysis(alpha, idx_onsets):

    # get the alpha power for each bin
    dict_alpha = {}
    dict_alpha['silence'] = alpha[idx_onsets['silence']]
    dict_alpha['low density'] = alpha[idx_onsets['low density']]
    dict_alpha['medium density'] = alpha[idx_onsets['medium density']]
    dict_alpha['high density'] = alpha[idx_onsets['high density']]

    # boxplot
    fig, ax = subplots()
    ax.boxplot(dict_alpha.values())
    ax.set_xticklabels(dict_alpha.keys())
    title("Distribution of alpha's power for four rythms")
    # savefig('results_44100/alpha_power_perf3')
    show()

    # kruskal_test
    print('Alpha Kruskal Test\n', kruskal_test(dict_alpha['silence'], dict_alpha['low density'], dict_alpha['medium density'], dict_alpha['high density']))

    # post hoc t test
    a = [dict_alpha['silence'], dict_alpha['low density'], dict_alpha['medium density'], dict_alpha['high density']]
    print('Alpha Posthoc t-test\n', posthoc_ttest(a))

    pass
Ejemplo n.º 2
0
def sign_barplot(df, val_col, group_col, test="HSD"):
    if test == "HSD":
        result_df = tukey_hsd(df, val_col, group_col)
    if test == "tukey":
        result_df = sp.posthoc_tukey(df, val_col, group_col)
    if test == "ttest":
        result_df = sp.posthoc_ttest(df, val_col, group_col)
    if test == "scheffe":
        result_df = sp.posthoc_scheffe(df, val_col, group_col)
    if test == "dscf":
        result_df = sp.posthoc_dscf(df, val_col, group_col)
    if test == "conover":
        result_df = sp.posthoc_conover(df, val_col, group_col)
    #マッピングのプロファイル
    fig, ax = plt.subplots(1, 2, figsize=(10, 6))
    cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef']
    heatmap_args = {
        'cmap': cmap,
        'linewidths': 0.25,
        'linecolor': '0.5',
        'clip_on': False,
        'square': True
    }

    sp.sign_plot(result_df, ax=ax[1], **heatmap_args)  #検定結果を描画

    sns.barplot(data=df, x=group_col, y=val_col, capsize=0.1,
                ax=ax[0])  #使ったデータを描画
    plt.show()
Ejemplo n.º 3
0
def anova_posthoc_tests(benchmark_snapshot_df):
    """Returns p-value tables for various ANOVA posthoc tests.

    Results should considered only if ANOVA test rejects null hypothesis.
    """
    common_args = {
        'a': benchmark_snapshot_df,
        'group_col': 'fuzzer',
        'val_col': 'edges_covered',
        'sort': True
    }
    p_adjust = 'holm'

    posthoc_tests = {}
    posthoc_tests['student'] = sp.posthoc_ttest(**common_args,
                                                equal_var=False,
                                                p_adjust=p_adjust)
    posthoc_tests['turkey'] = sp.posthoc_tukey(**common_args)
    return posthoc_tests
Ejemplo n.º 4
0
res = sm.stats.anova_lm(model, typ=2)
res.to_csv(tracker, mode="a")

tracker.write("\nPost-hoc Tukey Tests\n")
mc = statsmodels.stats.multicomp.MultiComparison(df['total_score'], df['year'])
mc_results = mc.tukeyhsd()
tracker.write(str(mc_results))

mc = statsmodels.stats.multicomp.MultiComparison(df['total_score'],
                                                 df['is_kashmir'])
mc_results = mc.tukeyhsd()
tracker.write(str(mc_results))

df['kashyear'] = df["is_kashmir"].astype(str) + df['year'].astype(str)
tracker.write("\nPost Hoc Student t-yTesting\n")
sp.posthoc_ttest(df, val_col='total_score',
                 group_col='kashyear').to_csv(tracker, mode="a")

#HYPOTHESIS 2 Kashmir conflict to Kashmir non conflict
tracker.write(
    "\n\n\n\nHYPOTHESIS 2: Kashmir-related headlines will have more negative sentiment scores on average in conflict periods than Kashmir-related headlines in non-conflict  periods\r\n"
)

rp.summary_cont(df.groupby(['is_kashmir',
                            'conflict'])['total_score']).to_csv(tracker,
                                                                mode="a")

levene = stats.levene(
    df['total_score'][(df['conflict'] == "Standoff")
                      & (df['is_kashmir'] == True)],
    df['total_score'][(df['conflict'] == "Mumbai")
                      & (df['is_kashmir'] == True)],
omega_1 = list(CSI.w1) + list(SSI.w1) + list(SSD.w1)

model = (['CSI'] * 50) + (['SSI'] * 50) + (['SSD'] * 50)

data = pd.DataFrame({'model': model, 'omega_0': omega_0, 'omega_1': omega_1})

# check if any of the means is significantly different from the rest
lm = ols('omega_0 ~ model', data=data).fit()
table = sm.stats.anova_lm(lm)
print(table)

# post-hoc test to see if CSI mean is significantly higher
print('omega_0')
print(
    sp.posthoc_ttest(data,
                     val_col='omega_0',
                     group_col='model',
                     p_adjust='bonferroni'))

print('\nomega_1')
print(
    sp.posthoc_ttest(data,
                     val_col='omega_1',
                     group_col='model',
                     p_adjust='bonferroni'))

#%% Load number of amino acids per site
CSI, SSI, SSD = num_aa_per_site(protein)
one = list(CSI[:, 0]) + list(SSI[:, 0]) + list(SSD[:, 0])
five = list(np.sum(CSI[:, 4:], axis=1)) + list(np.sum(
    SSI[:, 4:], axis=1)) + list(np.sum(SSD[:, 4:], axis=1))
Ejemplo n.º 6
0
 def calculate_p_values_groups(self, measure, merged_df, figures_dir, affix, plot=True):
     merged_df = merged_df[merged_df[measure].notnull()]
     p_values = sp.posthoc_ttest(merged_df, val_col=measure, group_col="group")
     if plot:
         p_values.to_csv(figures_dir + "/p_values_" +affix+ ".csv")
     return p_values
#print(curr_engine_and_device)
#print(total_runtimes.shape)
#print(total_runtimes)

# test if data is normal
res_normal = np.zeros((len(total_runtimes)), dtype=np.float32)
for i in range(len(total_runtimes)):
    res_normal[i] = np.round(shapiro(total_runtimes[i])[1], 4)

print(res_normal)
print(multipletests(res_normal, method="fdr_bh")[1])

df = pd.DataFrame(total_runtimes, curr_engine_and_device)

# test significance between total runtime between IEs (assuming normal)
res = np.round(sp.posthoc_ttest(total_runtimes, p_adjust="fdr_by"), 4)

print(total_runtimes.shape)
print(curr_engine_and_device)

# Use Tukey's method for multiple testing instead (works well with groups of the same number of samples)
print(total_runtimes.flatten().dtype)
tmp = np.repeat(range(len(curr_engine_and_device)), 10)
print(tmp.dtype)

#tukey = pairwise_tukeyhsd(total_runtimes.flatten(), groups=tmp, alpha=0.05)

#print(df)
#print(res)
#print(tukey)
'''
Ejemplo n.º 8
0
import statsmodels.api as sa
import statsmodels.formula.api as sfa
import scikit_posthocs as sp
import numpy as np
import pandas as pd

df = sa.datasets.get_rdataset('iris').data

print(df.head())

ttest = sp.posthoc_ttest(df,
                         val_col='Sepal.Width',
                         group_col='Species',
                         p_adjust='holm')

tukey = sp.posthoc_tukey_hsd()
Ejemplo n.º 9
0
import scikit_posthocs as sp
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import statsmodels.api as sm

data = pd.read_table(
    "https://www.krigolsonteaching.com/uploads/4/3/8/4/43848243/sampleanovadata2.txt",
    header=None)
data.columns = ['Subject', 'Group', 'rt']

#Groupings
G1 = data[data['Group'] == 1]['rt']
G2 = data[data['Group'] == 2]['rt']
G3 = data[data['Group'] == 3]['rt']
G4 = data[data['Group'] == 4]['rt']

#anova

A1 = stats.f_oneway(G1, G2, G3, G4)

#Post-Hoc analysis

tt = sp.posthoc_ttest(A1)
Tuk = sp.posthoc_tukey_hsd('rt', 'Group', alpha=0.04)

#Princple component anlysis
#Support Vector Machine learning
#K-means cluster learning