Ejemplo n.º 1
0
    def anova_test(self,
                   value_col,
                   group_col,
                   subject_col,
                   condition=False,
                   display_result=True):
        # collect data
        data = self.__get_condition(self.df, condition)

        # perform test
        summary = rm_anova(data,
                           value_col,
                           group_col,
                           subject_col,
                           correction=True,
                           effsize='n2')
        if display_result:
            print("#############")
            print("### ANOVA ###")
            print("#############")
            if not condition is False:
                print(self.__condition_to_string(condition))
            display(summary)
            print("")

        return summary
Ejemplo n.º 2
0
def do_anova(meas, n_subjs, thetas, n_rules=2):
    n_subjs = n_reps_bundled
    n_thetas = len(thetas)
    df2_arr = np.zeros(shape=[n_subjs * n_thetas * n_rules, 4])
    index = 0
    for subj_i in np.arange(n_subjs):
        for theta_ind, theta_i in enumerate(
                np.arange(n_thetas)):  #enumerate([0, -1]):
            for rule_i in np.arange(n_rules):
                df2_arr[index, :] = [
                    subj_i, rule_i, theta_ind, meas[theta_i, rule_i, subj_i]
                ]
                index += 1
    df2 = pd.DataFrame(df2_arr,
                       columns=['obs', 'ruleFact', 'thetaFact', 'perf'])
    aov = pg.rm_anova(data=df2,
                      dv='perf',
                      subject='obs',
                      within=['ruleFact', 'thetaFact'],
                      detailed=True,
                      effsize='n2')
    fs = aov['F'].values
    pvals = aov['p-unc']
    etas = aov['n2']
    return fs, pvals, etas
Ejemplo n.º 3
0
    def pairwise_ttests_paired(self):

        df = self.df.melt(id_vars="ID")

        self.oneway_rm_aov = pg.rm_anova(data=df, dv="value", within="variable", subject='ID')

        self.ttests_paired = pg.pairwise_ttests(dv="value", subject='ID',
                                                within='variable', data=df,
                                                padjust="holm", effsize="hedges", parametric=True)
Ejemplo n.º 4
0
    def perform_anova(self, intensity):

        self.aov = pg.rm_anova(data=self.df_percent, dv=intensity, within="Model", subject="ID", correction=True,
                               detailed=True)
        print(self.aov)

        self.posthoc = pg.pairwise_ttests(dv=intensity, subject='ID', within="Model",
                                          data=self.df_percent,
                                          padjust="bonf", effsize="hedges", parametric=True)
        print(self.posthoc)
Ejemplo n.º 5
0
def make_anova_2way(df, title):
    print("\tMAKING ANOVA")

    SIGNIFICANCE_CUTOFF = .4
    anova_text = title + "\n"
    # print("ANOVA FOR ")
    # print(analysis_label)
    # print(df[analysis_label])

    # bx = sns.boxplot(data=df, x='question', y='value', hue='context')

    # print(df_col)
    # df_col.columns == ['variable', 'value']
    
    # val_min = df_col['value'].get(df_col['value'].idxmin())
    # val_max = df_col['value'].get(df_col['value'].idxmax())
    # homogenous_data = (val_min == val_max)
    homogenous_data = False

    if not homogenous_data:
        aov = pg.rm_anova(dv='value', within=['question', 'context'], subject='ResponseId', data=df)
        aov.round(3)

        anova_text = anova_text + str(aov)
        aov.to_csv(FILENAME_ANOVAS + fn + '-anova.csv')

        p_vals = aov['p-unc']

        # if p_chair < SIGNIFICANCE_CUTOFF:
        #     print("Chair position is significant for " + analysis_label + ": " + str(p_chair))
        #     # print(title)
        # if p_path_method < SIGNIFICANCE_CUTOFF:
        #     print("Pathing method is significant for " + analysis_label + ": " + str(p_path_method))
        #     # print(title)

        # anova_text = anova_text + "\n"
        # Verify that subjects is legit
        # print(df[subject_id])

        posthocs = pg.pairwise_ttests(dv='value', within=['question', 'context'], subject='ResponseId', data=df, padjust='bonf')
        # pg.print_table(posthocs)
        anova_text = anova_text + "\n" + str(posthocs)
        posthocs.to_csv(FILENAME_ANOVAS + fn + '-posthocs.csv')
        print()

    else:
        print("! Issue creating ANOVA for " + analysis_label)
        print("Verify that there are at least a few non-identical values recorded")
        anova_text = anova_text + "Column homogenous with value " + str(val_min)


    f = open(FILENAME_ANOVAS + fn + "-anova.txt", "w")
    f.write(anova_text)
    f.close()
Ejemplo n.º 6
0
    def activity_stats(self, data_type='percent'):

        if data_type == 'percent':
            intensity_list = ["Sedentary%", "Light%", "Moderate%", "Vigorous%", "MVPA%"]
            df = self.df_activity[["ID", "Model", "Sedentary%", "Light%", "Moderate%", "Vigorous%", "MVPA%"]]

        if data_type == 'minutes':
            intensity_list = ["Sedentary", "Light", "Moderate", "Vigorous", "MVPA"]
            df = self.df_activity[["ID", "Model", "Sedentary", "Light", "Moderate", "Vigorous", "MVPA"]]

        for i, intensity in enumerate(intensity_list):

            if i == 0:
                aov_df = pg.rm_anova(data=df, dv=intensity, within="Model", subject="ID",
                                     correction=True, detailed=True)
                aov_df.insert(0, "Intensity", [intensity for i in range(2)])

                """post_df = pg.pairwise_ttests(dv=intensity, subject='ID', within="Model",
                                             data=df, padjust="none", effsize="hedges", parametric=False)
                post_df.insert(0, "Intensity", [intensity for i in range(6)])"""

            if i > 0:
                aov = pg.rm_anova(data=df, dv=intensity, within="Model", subject="ID",
                                  correction=True, detailed=True)
                aov["Intensity"] = [intensity for i in range(2)]

                """post = pg.pairwise_ttests(dv=intensity, subject='ID', within="Model",
                                          data=df, padjust="none", effsize="hedges", parametric=False)
                post["Intensity"] = [intensity for i in range(6)]"""

                aov_df = aov_df.append(aov)

                # post_df = post_df.append(post)

            aov_df["Significant"] = ["Yes" if p < .05 else "No" for p in aov_df["p-unc"]]

        return aov_df
Ejemplo n.º 7
0
def test_friedman(df, ind_var, dep_var, is_non_normal=None):
    print(f'\n{dep_var}:')
    # test_df = pd.DataFrame()

    if is_non_normal == None:
        normality_p = test_normality(df, dep_var,
                                     list(df['Condition number'].unique()))
        significants = [p for p in normality_p if p < 0.01]
        is_non_normal = len(significants) > 0

        sphericity_p = test_sphericity(df, dep_var, ind_var)

    for iv in list(df[ind_var].unique()):
        df_iv = df.loc[df[ind_var] == iv]

        dv = list(df_iv[dep_var])
        # test_df[f'{dep_var} {iv}'] = dv
        print(f'{iv}: mean={round(np.mean(dv), 2)}, SD={round(np.std(dv), 2)}')

    if not is_non_normal and sphericity_p:

        print('\nRM ANOVA')
        results = pg.rm_anova(data=df,
                              dv=dep_var,
                              within=ind_var,
                              subject='ID',
                              correction=False,
                              detailed=True)
        results = results.round(4)
        print(results)

    else:
        print('\nFriedman test')
        results = pg.friedman(data=df,
                              dv=dep_var,
                              within=ind_var,
                              subject='ID')

        X2 = list(results['Q'])[0]
        N = len(list(df['ID'].unique()))
        k = len(list(df[ind_var].unique()))
        kendall_w = X2 / (N * (k - 1))

        results['Kendall'] = [kendall_w]

        results = results.round(3)
        print(results)
def anova_onoff(on, off, subjects, columns):
    off = pd.DataFrame(data=np.insert(off, 0, np.arange(len(subjects)),
                                      axis=1),
                       columns=columns[:-1])
    off = pd.melt(off,
                  id_vars=['sub'],
                  value_vars=columns[1:-1],
                  var_name='block',
                  value_name='RT')
    off.insert(1, 'Triplet', np.zeros(len(off)))
    on = pd.DataFrame(data=np.insert(on, 0, np.arange(len(subjects)), axis=1),
                      columns=columns)
    on = pd.melt(on,
                 id_vars=['sub'],
                 value_vars=columns[1:],
                 var_name='block',
                 value_name='RT')
    on.insert(1, 'Triplet', np.ones(len(on)))
    anova_onoff = pd.concat([on, off])
    aov_stats = pg.rm_anova(data=anova_onoff,
                            dv='RT',
                            within=['block', 'Triplet'],
                            subject='sub')
    return aov_stats
Ejemplo n.º 9
0
    def test_pandas(self):
        """Test pandas method.
        """
        # Test the ANOVA (Pandas)
        aov = df.anova(dv='Scores', between='Group', detailed=True)
        assert aov.equals(
            pg.anova(dv='Scores', between='Group', detailed=True, data=df))
        aov3_ss1 = df_aov3.anova(dv='Cholesterol',
                                 between=['Sex', 'Drug'],
                                 ss_type=1)
        aov3_ss2 = df_aov3.anova(dv='Cholesterol',
                                 between=['Sex', 'Drug'],
                                 ss_type=2)
        aov3_ss2_pg = pg.anova(dv='Cholesterol',
                               between=['Sex', 'Drug'],
                               data=df_aov3,
                               ss_type=2)
        assert not aov3_ss1.equals(aov3_ss2)
        assert aov3_ss2.round(3).equals(aov3_ss2_pg.round(3))

        # Test the Welch ANOVA (Pandas)
        aov = df.welch_anova(dv='Scores', between='Group')
        assert aov.equals(pg.welch_anova(dv='Scores', between='Group',
                                         data=df))

        # Test the ANCOVA
        aov = df_anc.ancova(dv='Scores', covar='Income',
                            between='Method').round(3)
        assert (aov.equals(
            pg.ancova(data=df_anc,
                      dv='Scores',
                      covar='Income',
                      between='Method').round(3)))

        # Test the repeated measures ANOVA (Pandas)
        aov = df.rm_anova(dv='Scores',
                          within='Time',
                          subject='Subject',
                          detailed=True)
        assert (aov.equals(
            pg.rm_anova(dv='Scores',
                        within='Time',
                        subject='Subject',
                        detailed=True,
                        data=df)))

        # FDR-corrected post hocs with Hedges'g effect size
        ttests = df.pairwise_tests(dv='Scores',
                                   within='Time',
                                   subject='Subject',
                                   padjust='fdr_bh',
                                   effsize='hedges')
        assert (ttests.equals(
            pg.pairwise_tests(dv='Scores',
                              within='Time',
                              subject='Subject',
                              padjust='fdr_bh',
                              effsize='hedges',
                              data=df)))

        # Pairwise Tukey
        tukey = df.pairwise_tukey(dv='Scores', between='Group')
        assert tukey.equals(
            pg.pairwise_tukey(data=df, dv='Scores', between='Group'))

        # Test two-way mixed ANOVA
        aov = df.mixed_anova(dv='Scores',
                             between='Group',
                             within='Time',
                             subject='Subject',
                             correction=False)
        assert (aov.equals(
            pg.mixed_anova(dv='Scores',
                           between='Group',
                           within='Time',
                           subject='Subject',
                           correction=False,
                           data=df)))

        # Test parwise correlations
        corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman')
        corrs2 = pg.pairwise_corr(data=data,
                                  columns=['X', 'M', 'Y'],
                                  method='spearman')
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation
        corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman')
        corrs2 = pg.partial_corr(x='X',
                                 y='Y',
                                 covar='M',
                                 method='spearman',
                                 data=data)
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation matrix (compare with the ppcor package)
        corrs = data.iloc[:, :5].pcorr().round(3)
        np.testing.assert_array_equal(corrs.iloc[0, :].to_numpy(),
                                      [1, 0.392, 0.06, -0.014, -0.149])
        # Now compare against Pingouin's own partial_corr function
        corrs = data[['X', 'Y', 'M']].pcorr()
        corrs2 = data.partial_corr(x='X', y='Y', covar='M')
        assert np.isclose(corrs.at['X', 'Y'], corrs2.at['pearson', 'r'])

        # Test rcorr (correlation matrix with p-values)
        # We compare against Pingouin pairwise_corr function
        corrs = df_corr.rcorr(padjust='holm', decimals=4)
        corrs2 = df_corr.pairwise_corr(padjust='holm').round(4)
        assert corrs.at['Neuroticism', 'Agreeableness'] == '*'
        assert (corrs.at['Agreeableness',
                         'Neuroticism'] == str(corrs2.at[2, 'r']))
        corrs = df_corr.rcorr(padjust='holm', stars=False, decimals=4)
        assert (corrs.at['Neuroticism',
                         'Agreeableness'] == str(corrs2.at[2,
                                                           'p-corr'].round(4)))
        corrs = df_corr.rcorr(upper='n', decimals=5)
        corrs2 = df_corr.pairwise_corr().round(5)
        assert corrs.at['Extraversion', 'Openness'] == corrs2.at[4, 'n']
        assert corrs.at['Openness', 'Extraversion'] == str(corrs2.at[4, 'r'])
        # Method = spearman does not work with Python 3.5 on Travis?
        # Instead it seems to return the Pearson correlation!
        df_corr.rcorr(method='spearman')
        df_corr.rcorr()

        # Test mediation analysis
        med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500)
        np.testing.assert_array_equal(med.loc[:, 'coef'].round(4).to_numpy(),
                                      [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
Ejemplo n.º 10
0
    else:
        error = "sd"

    if y_var2 == "None":
        st.write(df.groupby(y_var)[x_var].agg(['mean', 'std', 'sem']).round(2))
    else:
        st.write(
            df.groupby([y_var, y_var2])[x_var].agg(['mean', 'std',
                                                    'sem']).round(2))

    if y_var2 == "None":
        st.success("One-way repeated measures ANOVA results")
        st.write(
            pg.rm_anova(dv=x_var,
                        within=y_var,
                        subject=subject_var,
                        data=df,
                        detailed=True))
        st.success("Post-hoc tests results")
        st.write(
            pg.pairwise_ttests(dv=x_var,
                               within=y_var,
                               subject=subject_var,
                               data=df))
        st.success("Plots are being generated")
        fig = plt.figure(figsize=(12, 6))

        try:
            ax = sns.pointplot(data=df,
                               x=y_var,
                               y=x_var,
Ejemplo n.º 11
0
    def stats_effect_weeks(self, excel_path):
        """
        Perform RM ANOVA and pairwise T Test (Holm sidak) on the mean of each week of training for each animal

        Parameters
        ----------
        excel_path : TYPE
            DESCRIPTION.

        Returns
        -------
        None.

        """
        df_excel = pd.read_excel(
            excel_path)  #read excel file output from analysis()
        # Classify sessions in weeks
        Week1 = list(self.range1(1, 9))
        Week2 = list(self.range1(10, 14))
        Week3 = list(self.range1(15, 19))
        Week4 = list(self.range1(20, 24))
        Week5 = list(self.range1(25, 29))
        week = []

        for i in range(len(df_excel.index)):
            week.append(1 if df_excel.iloc[i, 2] in Week1 else 2 if df_excel.
                        iloc[i, 2] in Week2 else 3 if df_excel.iloc[
                            i, 2] in Week3 else 4 if df_excel.iloc[i, 2] in
                        Week4 else 5 if df_excel.iloc[i,
                                                      2] in Week5 else 'Error')

        #Add a column week
        df_excel['Semaine'] = week
        #Group in a new dataframe by animal and session and calculate the mean
        df_stats = df_excel[['Animal', 'Passing_Time',
                             'Semaine']].groupby(['Animal', 'Semaine'
                                                  ]).mean().reset_index()

        # sn.lineplot(x="Semaine", y="Passing_Time", data=df_stats.query('Semaine > 1'), hue='Animal').get_figure()

        #Rearrange in a new dataframe with a column for each week mean
        df_stats_arranged = pd.DataFrame(columns=[
            'Animal', 'Semaine 1', 'Semaine 2', 'Semaine 3', 'Semaine 4',
            'Semaine 5'
        ])

        Animal = list(dict.fromkeys(df_excel.Animal.tolist()))
        #Loop on every animals to append each animal in the new arranged dataframe
        for a in Animal:
            for i in range(len(df_stats.index)):
                if df_stats.iloc[i, 1] == 1 and df_stats.iloc[i, 0] == a:
                    df_stats_arranged = df_stats_arranged.append(
                        {
                            'Animal': a,
                            'Semaine 1': df_stats.iloc[i, 2],
                            'Semaine 2': df_stats.iloc[i + 1, 2],
                            'Semaine 3': df_stats.iloc[i + 2, 2],
                            'Semaine 4': df_stats.iloc[i + 3, 2],
                            'Semaine 5': df_stats.iloc[i + 4, 2]
                        },
                        ignore_index=True)

        #create a dataframe with a repeated mesure anova
        df_result = pd.DataFrame(
            pg.rm_anova(dv='Passing_Time',
                        within='Semaine',
                        subject='Animal',
                        data=df_stats,
                        detailed=True))
        #create a dataframe with pairwise t test Holm sidak
        df_post_hocs = pd.DataFrame(
            pairwise_ttests(dv='Passing_Time',
                            within='Semaine',
                            subject='Animal',
                            data=df_stats,
                            padjust='holm'))

        #Save in an excel file containing different sheets
        self.writer = pd.ExcelWriter('{}/Stats.xlsx'.format(
            Path(excel_path).parent),
                                     engine='xlsxwriter')
        df_stats_arranged.to_excel(self.writer, sheet_name='Data')
        df_result.to_excel(self.writer, sheet_name='ANOVA')
        df_post_hocs.to_excel(self.writer, sheet_name='Post Hoc')
        self.writer.save()
Ejemplo n.º 12
0
    def test_pandas(self):
        """Test pandas method.
        """
        # Test the ANOVA (Pandas)
        aov = df.anova(dv='Scores', between='Group', detailed=True)
        assert aov.equals(
            pg.anova(dv='Scores', between='Group', detailed=True, data=df))

        # Test the Welch ANOVA (Pandas)
        aov = df.welch_anova(dv='Scores', between='Group')
        assert aov.equals(pg.welch_anova(dv='Scores', between='Group',
                                         data=df))

        # Test the repeated measures ANOVA (Pandas)
        aov = df.rm_anova(dv='Scores',
                          within='Time',
                          subject='Subject',
                          detailed=True)
        assert aov.equals(
            pg.rm_anova(dv='Scores',
                        within='Time',
                        subject='Subject',
                        detailed=True,
                        data=df))

        # FDR-corrected post hocs with Hedges'g effect size
        ttests = df.pairwise_ttests(dv='Scores',
                                    within='Time',
                                    subject='Subject',
                                    padjust='fdr_bh',
                                    effsize='hedges')
        assert ttests.equals(
            pg.pairwise_ttests(dv='Scores',
                               within='Time',
                               subject='Subject',
                               padjust='fdr_bh',
                               effsize='hedges',
                               data=df))

        # Test two-way mixed ANOVA
        aov = df.mixed_anova(dv='Scores',
                             between='Group',
                             within='Time',
                             subject='Subject',
                             correction=False)
        assert aov.equals(
            pg.mixed_anova(dv='Scores',
                           between='Group',
                           within='Time',
                           subject='Subject',
                           correction=False,
                           data=df))

        # Test parwise correlations
        corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman')
        corrs2 = pg.pairwise_corr(data=data,
                                  columns=['X', 'M', 'Y'],
                                  method='spearman')
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation
        corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman')
        corrs2 = pg.partial_corr(x='X',
                                 y='Y',
                                 covar='M',
                                 method='spearman',
                                 data=data)
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation matrix (compare with the ppcor package)
        corrs = data.pcorr().round(3)
        np.testing.assert_array_equal(corrs.iloc[0, :].values,
                                      [1, 0.392, 0.06, -0.014, -0.149])
        # Now compare against Pingouin's own partial_corr function
        corrs = data[['X', 'Y', 'M']].pcorr()
        corrs2 = data.partial_corr(x='X', y='Y', covar='M')
        assert round(corrs.loc['X', 'Y'], 3) == corrs2.loc['pearson', 'r']

        # Test mediation analysis
        med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500)
        np.testing.assert_array_equal(med.loc[:, 'coef'].values,
                                      [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
Ejemplo n.º 13
0
                                 columns=["winsize", "clustering"],
                                 values="deviation")
        pivot_t.to_csv("pt_exp2.csv")

    # mean values
    if cal_mean_std:
        crowdingcon = 0
        cal_ds_mean(mydata, crowdingcon=crowdingcon, col_name="crowding")
        cal_ds_std(mydata, crowdingcon=crowdingcon, col_name="crowding")

    # 3 ways anova
    if see_clustering_level:
        aov_table = AnovaRM(data=mydata2test,
                            depvar="deviation",
                            subject="participantID",
                            within=["crowding", "winsize",
                                    "clustering"]).fit()
        aov_table.summary()
    else:
        aov = pg.rm_anova(dv="deviation",
                          within=["winsize", "crowding"],
                          subject="participantID",
                          data=mydata2test)

        posthocs = pg.pairwise_ttests(dv="deviation",
                                      within=["winsize", "crowding"],
                                      subject="participantID",
                                      data=mydata2test,
                                      padjust="fdr_bh",
                                      effsize="cohen")
Ejemplo n.º 14
0
def data():

    # have this make a sigmoid with random number of correct trials each time its called.
    start_time = time.time()

    iterations = 500
    trials = 60
    track_lengths = np.array([50, 75, 112.5, 168.75, 253.125])
    coef1 = 5
    coef2 = -0.05
    coef3 = 4
    coef4 = -0.02
    n_conditions = 2

    n_subjects = np.array([2, 4, 5, 10, 20, 30, 40])
    coef3s = np.linspace(1, 10, 5)
    coef4s = np.linspace(-0.01, -0.1, 5)

    parameters_powers_conditions = np.zeros(
        (len(n_subjects), len(coef3s), len(coef4s)))
    parameters_powers_track_length = np.zeros(
        (len(n_subjects), len(coef3s), len(coef4s)))
    parameters_powers_interaction = np.zeros(
        (len(n_subjects), len(coef3s), len(coef4s)))

    # consider condition 1 first
    group1_theo = (np.e**(coef1 + (coef2*track_lengths)))/ \
                  (np.e**(coef1 + (coef2*track_lengths))+1)

    z1 = coef1 + (coef2 * track_lengths)
    pr = 1 / (1 + np.e**(-z1))

    # now consider condition 2
    group2_theo = (np.e**(coef3 + (coef4*track_lengths)))/ \
                  (np.e**(coef3 + (coef4*track_lengths))+1)

    z2 = coef3 + (coef4 * track_lengths)
    pr2 = 1 / (1 + np.e**(-z2))

    for counter3, coef3 in enumerate(coef3s):
        for counter4, coef4 in enumerate(coef4s):

            z1 = coef1 + (coef2 * track_lengths)
            pr = 1 / (1 + np.e**(-z1))

            z2 = coef3 + (coef4 * track_lengths)
            pr2 = 1 / (1 + np.e**(-z2))

            for n_counter, n in enumerate(n_subjects):
                condition_p = []
                track_length_p = []
                interaction_p = []

                subject_id_long = np.tile(
                    np.transpose(
                        np.tile(np.linspace(1, n, n),
                                (len(track_lengths), 1))).flatten(),
                    n_conditions)
                conditions_long = np.append(
                    np.ones(len(track_lengths) * n),
                    np.ones(len(track_lengths) * n) *
                    2)  # currently hardcoded for only 2 conditions
                track_lengths_long = np.tile(track_lengths, n_conditions * n)

                for i in range(iterations):
                    y_percentage1 = (
                        (np.random.binomial(trials, pr,
                                            (n, len(track_lengths))) / trials)
                        * 100).flatten()
                    y_percentage2 = (
                        (np.random.binomial(trials, pr2,
                                            (n, len(track_lengths))) / trials)
                        * 100).flatten()
                    appended_correct = np.append(
                        y_percentage1, y_percentage2
                    )  # currently hardcoded for only 2 conditions

                    df = pd.DataFrame({
                        "subject":
                        subject_id_long,
                        "Condition":
                        conditions_long,
                        'Track_length':
                        track_lengths_long,
                        'percentage_corr_trials':
                        appended_correct
                    })
                    aov = pg.rm_anova(dv='percentage_corr_trials',
                                      within=['Condition', 'Track_length'],
                                      subject='subject',
                                      data=df,
                                      detailed=True)

                    condition_p.append(
                        np.nan_to_num(
                            aov[aov.Source == "Condition"]['p-unc'].values[0]))
                    track_length_p.append(
                        np.nan_to_num(aov[aov.Source == "Track_length"]
                                      ['p-unc'].values[0]))
                    interaction_p.append(
                        np.nan_to_num(
                            aov[aov.Source == "Condition * Track_length"]
                            ['p-unc'].values[0]))

                condition_p = np.array(condition_p)
                track_length_p = np.array(track_length_p)
                interaction_p = np.array(interaction_p)

                power_condition = len(
                    condition_p[condition_p < 0.05]) / iterations
                power_track_length = len(
                    track_length_p[track_length_p < 0.05]) / iterations
                power_interaction = len(
                    interaction_p[interaction_p < 0.05]) / iterations

                parameters_powers_conditions[n_counter, counter3,
                                             counter4] = power_condition
                parameters_powers_track_length[n_counter, counter3,
                                               counter4] = power_track_length
                parameters_powers_interaction[n_counter, counter3,
                                              counter4] = power_interaction

                print("it took ",
                      time.time() - start_time, "for 1 simulated loop to run")
                print("currently on ", str(n), "n subjects, ", str(coef3),
                      "coef3 and ", str(coef4), "coef4")
                start_time = time.time()

    #np.save('/mnt/datastore/Harry/OculusVR/Power_analysis/Harry_figs/conditions_assay.npy', parameters_powers_conditions)
    #np.save('/mnt/datastore/Harry/OculusVR/Power_analysis/Harry_figs/track_length.npy', parameters_powers_track_length)
    #np.save('/mnt/datastore/Harry/OculusVR/Power_analysis/Harry_figs/interaction.npy', parameters_powers_interaction)

    np.save(
        r'Z:\ActiveProjects\Harry\OculusVR\Power_analysis\Harry_figs\conditions_assay.npy',
        parameters_powers_conditions)
    np.save(
        r'Z:\ActiveProjects\Harry\OculusVR\Power_analysis\Harry_figs\track_length.npy',
        parameters_powers_track_length)
    np.save(
        r'Z:\ActiveProjects\Harry\OculusVR\Power_analysis\Harry_figs\interaction.npy',
        parameters_powers_interaction)
    '''
import pandas as pd
from statsmodels.stats.anova import AnovaRM
import pingouin as pg

if __name__ == '__main__':
    PATH = "../../data/ms2_uniform_prolific_1_data/"
    DATA = "prolifc_data_combine_num_each_pp.xlsx"
    DATA2 = "prolifc_data_each_pp.xlsx"
    winsize = 0.6

    # ANOVA within subject clustering (5) * type (2) for each winsize
    data = pd.read_excel(PATH + DATA)
    data = data[data["winsize"] == winsize]

    aov = pg.rm_anova(data=data,
                      dv="mean_deviation_score",
                      within=["percent_triplets", "protectzonetype"],
                      subject="participant")

    posthocs = pg.pairwise_ttests(
        dv="mean_deviation_score",
        within=["percent_triplets", "protectzonetype"],
        subject="participant",
        data=data,
        padjust="fdr_bh",
        effsize="cohen")

    # ANOVA within subject
    data2 = pd.read_excel(PATH + DATA2)
    data2 = data2[data2["winsize"] == 0.4]  # winsize 0.4 unblanced data
    aov_table = AnovaRM(
        data=data2,
Ejemplo n.º 16
0
df_PT = pd.DataFrame(data=d2)
df_PT = df_PT[df_PT.AbsError.notnull()]  # no nan
assert (df_PT.PT.isnull().sum() == 0)

df_mean_nn = df_mean[df_mean.AbsError.notnull()]  # dropping null values
####################################
# # Running Statistical Tests
######################################

# t-test for comparison to Sven's analysis
ttestSNR = pingouin.ttest(loSNR, hiSNR, paired=True)  # correction='auto'

# rm_anova for SNR on Error
rm_SNR = pingouin.rm_anova(data=df_mean_nn,
                           dv='AbsError',
                           within=['SNR'],
                           subject='Sub')
print(rm_SNR)

# # MLM for SNR on Error
# mlm_SNR = smf.mixedlm("AbsError ~ SNR", df_mean_nn, groups=df_mean_nn["Sub"])
# mdf_SNR = mlm_SNR.fit()
# print(mdf_SNR.summary())
#
# A = np.identity(len(mdf_SNR.params))
# A = A[1:,:]
# print(mdf_SNR.f_test(A))

# MLM for PT on Error
mlm_PT = smf.mixedlm("AbsError ~ PT", df_PT, groups=df_PT["Sub"])
mdf_PT = mlm_PT.fit()
Ejemplo n.º 17
0
import seaborn as sns
import statsmodels
from scipy.stats import spearmanr
from pingouin import mixed_anova, anova, pairwise_tukey
from pingouin import logistic_regression
import pprint
from statsmodels.multivariate.manova import MANOVA
from pingouin import ancova

#import data in long and wide format for different anlysis
data_long = pd.read_csv(
    r'C:\Users\user\Desktop\FOCUS\behavioral\ready_to_stat\master_data_long_mDNA.csv'
)
#Exclude participant 26 for ERP analysis as we know that he is way off with amplitudes in this paradigm.
data_long = data_long[data_long.participant != 'P26']
data_long.describe()

data_wide_mDNA = pd.read_csv(
    r'C:\Users\user\Desktop\FOCUS\behavioral\ready_to_stat\master_data_wide_mDNA.csv'
)
#Exclude participant 26 for ERP analysis as we know that he is way off with amplitudes in this paradigm.
data_wide_mDNA = data_wide_mDNA[data_wide_mDNA.participant != 'P26']
data_wide_mDNA.describe()
aov_declog = pg.rm_anova(data=data_long,
                         dv='parietal_AlphaPowerDecLog',
                         within='blocks',
                         subject='participant',
                         detailed=True,
                         correction=True)

print(aov_declog)
Ejemplo n.º 18
0
# In[ ]:

## now to simulate the data
data1 = mean1 + np.random.randn(N) * stdev
data2 = mean2 + np.random.randn(N) * stdev
data3 = mean3 + np.random.randn(N) * stdev

datamat = np.vstack((data1, data2, data3)).T

# convert to a pandas dataframe
df = pd.DataFrame(data=datamat, columns=['d1', 'd2', 'd3'])
df

# In[ ]:

pg.rm_anova(data=df, detailed=True)

# In[ ]:

df.boxplot()

# In[ ]:

## example from SPSS website

# https://www.spss-tutorials.com/repeated-measures-anova/

data = [[8, 7, 6, 7], [5, 8, 5, 6], [6, 5, 3, 4], [6, 6, 7, 3], [8, 10, 8, 6],
        [6, 5, 6, 3], [6, 5, 2, 3], [9, 9, 9, 6], [5, 4, 3, 7], [7, 6, 6, 5]]

df = pd.DataFrame(data=data, columns=['1', '2', '3', '4'])
Ejemplo n.º 19
0
def efficiency(data):
    # out paths
    func_name = sys._getframe().f_code.co_name
    out_prefix = func_name + "_"
    out_csv = config.OUT_EVALS_DIR + "/" + out_prefix
    out_png = config.OUT_PLOT_DIR + "/" + out_prefix

    plot_list = []
    norm_time_dict = {}
    var = "time"
    for c in config.CalcByType:
        # box plots
        plot_list.append(
            create_plot(data, c, var, plots.saveBoxPlot, out_png + "box"))
        # statistics + out
        norm_time_dict[c.name] = create_stat(data, c, var, shapiro, out_csv,
                                             config.OUT_NORM_FILE)
        # qq plots
        plot_list.append(
            create_plot(data, c, var, plots.saveQQPlot, out_png + "qq"))

    # var by no calctype
    norm_time_dict["None"] = create_stat(data, None, var, shapiro, out_csv,
                                         config.OUT_NORM_FILE)
    plot_list.append(
        create_plot(data, None, var, plots.saveQQPlot, out_png + "qq"))

    # ONE WAY ANOVA w repeated measurements
    out_one_way_anova = out_csv + var + "_" + config.OUT_ONE_WAY_ANOVA_FILE + "." + config.OUT_CSV_EXT
    data_log = data.deep_copy()
    data_log[var] = np.log10(data_log[var])
    # Remove outliers
    q = data_log['time'].quantile(0.96)
    data_log = data_log[data_log["time"] < q]
    one_way_anova_aov = pg.rm_anova(dv=var,
                                    data=data_log,
                                    subject='user',
                                    within='video',
                                    detailed=True)
    one_way_anova_aov.to_csv(out_one_way_anova, index=False)

    # Pairwise T-test
    out_ttest = out_csv + var + "_" + config.OUT_TTEST_FILE + "." + config.OUT_CSV_EXT
    ttest_result = pg.pairwise_ttests(dv=var,
                                      within='video',
                                      subject='user',
                                      data=data_log,
                                      padjust='bonferroni',
                                      effsize='hedges',
                                      tail='one-sided',
                                      return_desc=True)
    ttest_result.to_csv(out_ttest, index=False)

    # MIXED_ANOVA
    out_mixed_anova = out_csv + var + "_" + config.OUT_MIXED_ANOVA_FILE + "." + config.OUT_CSV_EXT
    m_anova = pg.mixed_anova(dv=var,
                             within='video',
                             between='tool',
                             subject='user',
                             data=data.df)
    m_anova.to_csv(out_mixed_anova, index=False)

    # Friedmann/Kruskal and Dunn
    types = [config.CalcByType.VIDEO, config.CalcByType.TOOL]
    tests = [friedmanchisquare, kruskal]
    pfx = [config.OUT_FRIEDMAN_FILE, config.OUT_KRUSKAL_FILE]
    stat_dict = {}
    for i in range(len(types)):
        res, plt = create_var_stats(data, [var], types[i], tests[i],
                                    out_prefix + pfx[i], False)
        plot_list += plt
        stat_dict[types[i].name] = res

    return {
        "success": True,
        "message": {
            'norm': str(norm_time_dict),
            'stats': str(stat_dict),
            'one_way_anova': str(one_way_anova_aov),
            'plots': str(plot_list)
        }
    }
Ejemplo n.º 20
0
def stats(model, quantity, data, targets, tw, rm, nd):
    if model == 'absolute':
        data = data.drop(['NormQuant'], axis=1)
        data['NormMean'] = data['NormMean'].astype(float)
        mean = 'NormMean'
    else:
        data = data.drop(['rq'], axis=1)
        data['rqMean'] = data['rqMean'].astype(float)
        mean = 'rqMean'

    # prepare data from intermediate dataframe
    data = data[data['Outliers'].eq(False)]
    data = data.drop_duplicates(keep='first')

    # t-test and anova for normally distributed data
    if nd == 'True':
        if quantity == 2:
            # T-Test between 2 groups
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            group = data['Group'].dropna()
            group = group.drop_duplicates(keep='first').values.tolist()
            for item in targets:
                df = data[data['Target Name'].eq(item)]
                group1 = df[df['Group'].eq(group[0])][mean]
                group2 = df[df['Group'].eq(group[1])][mean]
                t_test = ttest(group1, group2, paired=bool(rm))

                if rm == 'True':
                    t_test['paired'] = 'TRUE'
                else:
                    t_test['paired'] = 'FALSE'
                t_test['Target Name'] = item
                if stats_dfs is None:
                    stats_dfs = t_test
                else:
                    stats_dfs = stats_dfs.append(t_test, ignore_index=True)
            # reformat output table
            stats_dfs = stats_dfs.rename(columns={
                'cohen-d': 'effect size',
                'BF10': 'Bayes factor',
                'dof': 'DF'
            })
            cols = [
                'Target Name', 'DF', 'T', 'tail', 'paired', 'p-val',
                'effect size', 'power', 'Bayes factor'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)
        elif quantity >= 3:
            # ANOVA test
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            # tukey_dfs = pandas.DataFrame()
            pvals = []
            for item in targets:
                if rm == 'True':
                    # one-way
                    if tw == 'False':
                        # repeated measure anova
                        aov = pg.rm_anova(
                            dv=mean,
                            data=data[data['Target Name'].eq(item)],
                            within='Group',
                            subject='Sample Name',
                            detailed=True)
                        pvals.append(aov['p-unc'][0])
                        aov = aov.drop([1])
                        aov['measures'] = ['dependent']
                        aov['Target Name'] = item
                    # two-way
                    else:
                        aov = pg.rm_anova(
                            dv=mean,
                            data=data[data['Target Name'].eq(item)],
                            within=['Group1', 'Group2'],
                            subject='Sample Name',
                            detailed=True)
                        reject_tw, pval_corr_tw = pg.multicomp(list(
                            aov['p-unc']),
                                                               alpha=0.05,
                                                               method='bonf')
                        aov['p-value corrected'] = pval_corr_tw
                        aov['measures'] = ['dependent'] * 3
                        aov['Target Name'] = [item] * 3
                    aov.drop(['eps'], axis=1)
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        within='Group',
                        subject='Sample Name',
                        padjust='fdr_bh')
                    ph['Target Name'] = item
                    ph['Test'] = 'T-Test'
                else:
                    # one-way
                    if tw == 'False':
                        aov = pg.anova(dv=mean,
                                       between='Group',
                                       data=data[data['Target Name'].eq(item)],
                                       detailed=True)
                        pvals.append(aov['p-unc'][0])
                        aov = aov.drop([1])
                        aov['measures'] = ['independent']
                        aov['Target Name'] = item
                        ph = pairwise_ttests(
                            data=data[data['Target Name'].eq(item)],
                            dv=mean,
                            between='Group',
                            padjust='fdr_bh')
                        ph['Test'] = 'T-Test'
                    # two-way
                    else:
                        aov = pg.anova(dv=mean,
                                       between=['Group1', 'Group2'],
                                       data=data[data['Target Name'].eq(item)],
                                       detailed=False)
                        aov = aov.drop([3])
                        reject_tw, pval_corr_tw = pg.multicomp(list(
                            aov['p-unc']),
                                                               alpha=0.05,
                                                               method='bonf')
                        aov['p-value corrected'] = pval_corr_tw
                        aov['measures'] = ['independent'] * 3
                        aov['Target Name'] = [item] * 3
                        ph = pairwise_ttests(
                            data=data[data['Target Name'].eq(item)],
                            dv=mean,
                            between=['Group1', 'Group2'],
                            padjust='fdr_bh')
                        ph['Test'] = 'T-Test'
                    ph['Target Name'] = item
                if stats_dfs is None:
                    stats_dfs = aov
                else:
                    stats_dfs = stats_dfs.append(aov, ignore_index=True)
                if posthoc_dfs is None:
                    posthoc_dfs = ph
                else:
                    posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True)

            reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf')

            # reformat output tables
            stats_dfs = stats_dfs.rename(columns={
                'p-unc': 'p-value',
                'np2': 'effect size'
            })
            if tw == 'False':
                stats_dfs['p-value corrected'] = pvals_corr
                stats_dfs['distribution'] = ['parametric'] * len(targets)
                stats_dfs['test'] = ['ANOVA'] * len(targets)
                stats_dfs['statistic'] = ['NA'] * len(targets)
            else:
                stats_dfs['distribution'] = ['parametric'] * (len(targets) * 3)
                stats_dfs['test'] = ['ANOVA'] * (len(targets) * 3)
                stats_dfs['statistic'] = ['NA'] * (len(targets) * 3)
            cols = [
                'Target Name', 'Source', 'DF', 'F', 'MS', 'SS', 'p-value',
                'p-value corrected', 'measures', 'distribution', 'test',
                'statistic', 'effect size'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)
            if tw == 'False':
                posthoc_dfs = posthoc_dfs.drop(['Contrast', 'T'], axis=1)
            else:
                posthoc_dfs = posthoc_dfs.drop(['T'], axis=1)
            posthoc_dfs = posthoc_dfs.rename(
                columns={
                    'hedges': 'effect size',
                    'p-corr': 'p-value corrected',
                    'p-unc': 'p-value',
                    'p-adjust': 'correction method',
                    'BF10': 'Bayes factor',
                    'dof': 'DF'
                })
            if tw == 'False':
                cols2 = [
                    'Target Name', 'A', 'B', 'DF', 'p-value corrected',
                    'p-value', 'correction method', 'Paired', 'Parametric',
                    'Test', 'effect size', 'Bayes factor'
                ]
            else:
                cols2 = [
                    'Target Name', 'Contrast', 'Group1', 'A', 'B', 'DF',
                    'p-value corrected', 'p-value', 'correction method',
                    'Paired', 'Parametric', 'Test', 'effect size',
                    'Bayes factor'
                ]
            posthoc_dfs = posthoc_dfs.reindex(columns=cols2)

    # nonparametric tests for not normally distributed data
    else:
        if quantity == 2:
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            group = data['Group'].dropna()
            group = group.drop_duplicates(keep='first').values.tolist()
            for item in targets:
                df = data[data['Target Name'].eq(item)]
                group1 = df[df['Group'].eq(group[0])][mean]
                group2 = df[df['Group'].eq(group[1])][mean]
                if rm == 'True':
                    # Mann-Whitney U test
                    test = mannwhitneyu(group1, group2)
                    test = pandas.DataFrame(
                        {
                            'Target Name': item,
                            'pvalue': test.pvalue,
                            'statistic': test.statistic
                        },
                        index=[0])
                else:
                    # Wilcoxon
                    test = wilcoxon(group1, group2)
                    test = pandas.DataFrame(
                        {
                            'Target Name': item,
                            'pvalue': test.pvalue,
                            'statistic': test.statistic
                        },
                        index=[0])
                if stats_dfs is None:
                    stats_dfs = test
                else:
                    stats_dfs = stats_dfs.append(test, ignore_index=True)

        elif quantity >= 3:
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()

            pvals = []
            for item in targets:
                if rm == 'True':
                    # friedman test for repeated measurements
                    df = pg.friedman(dv=mean,
                                     within='Group',
                                     subject='Sample Name',
                                     data=data[data['Target Name'].eq(item)])
                    pvals.append(df['p-unc'][0])
                    df['test'] = ['Friedman Q']
                    df['measures'] = ['dependent']
                    df = df.rename(columns={'Q': 'statistic'})
                    df['Target Name'] = item
                    df['DF'] = 'NA'
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        within='Group',
                        subject='Sample Name',
                        padjust='fdr_bh',
                        parametric=False)
                    ph['Target Name'] = item
                    ph['DF'] = 'NA'
                    ph['Bayes factor'] = 'NA'
                    ph['Test'] = 'Wilcoxon'
                else:
                    # Kruskal-Wallis H test
                    df = pg.kruskal(dv=mean,
                                    between='Group',
                                    data=data[data['Target Name'].eq(item)])
                    pvals.append(df['p-unc'][0])
                    df['test'] = ['Kruskal-Wallis H']
                    df['measures'] = ['independent']
                    df = df.rename(columns={'H': 'statistic'})
                    df['Target Name'] = item
                    df['DF'] = 'NA'
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        between='Group',
                        padjust='fdr_bh',
                        parametric=False)
                    ph['Target Name'] = item
                    ph['DF'] = 'NA'
                    ph['Bayes factor'] = 'NA'
                    ph['Test'] = 'Mann-Whitney U'
                if stats_dfs is None:
                    stats_dfs = df
                else:
                    stats_dfs = stats_dfs.append(df, ignore_index=True)
                if posthoc_dfs is None:
                    posthoc_dfs = ph
                else:
                    posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True)

            reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf')
            # reformat output tables
            stats_dfs = stats_dfs.rename(columns={
                'dof': 'DF',
                'p-unc': 'p-value'
            })
            stats_dfs['p-value corrected'] = pvals_corr
            stats_dfs['distribution'] = ['non-parametric'] * len(targets)
            stats_dfs['MS'] = ['NA'] * len(targets)
            stats_dfs['SS'] = ['NA'] * len(targets)
            stats_dfs['effect size'] = ['NA'] * len(targets)
            cols = [
                'Target Name', 'DF', 'MS', 'SS', 'p-value',
                'p-value corrected', 'measures', 'distribution', 'test',
                'statistic', 'effect size'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)

            posthoc_dfs = posthoc_dfs.drop(['Contrast'], axis=1)
            posthoc_dfs = posthoc_dfs.rename(
                columns={
                    'hedges': 'effect size',
                    'p-corr': 'p-value corrected',
                    'p-unc': 'p-value',
                    'p-adjust': 'correction method',
                    'BF10': 'Bayes factor'
                })
            cols2 = [
                'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value',
                'correction method', 'Paired', 'Parametric', 'Test',
                'effect size', 'Bayes factor'
            ]
            posthoc_dfs = posthoc_dfs.reindex(columns=cols2)

    return stats_dfs, posthoc_dfs
Ejemplo n.º 21
0
# -*- coding: utf-8 -*-
"""
Created on Wed May  1 15:55:49 2019

@author: Antoine
"""

#%% anova pingouin

import pingouin as pg
import pandas as pd

data = pd.read_csv("aggregated_data.txt")
data = data[data.ISI<6]

aov = pg.rm_anova(dv="d", within=["modulation_type", "ISI"], 
                  subject="subject", data=data)


pg.print_table(aov)

clean_aov = aov[["Source","ddof1", "F", "p-unc", "p-GG-corr", "np2"]]
clean_aov.columns = ["Variable", "ddl", "F-value", "p-value", "p-value corrigee", "partial eta-square"]
clean_aov.to_excel("resultats_anova.xlsx")



#%% anova stats model MARCHE PAS 

from statsmodels.stats.anova import AnovaRM
import pandas as pd
Ejemplo n.º 22
0
	def analyse(self, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation=True, ttest_type=1):
		"""This function carries out the required statistical analysis.

		 The analysis is carried out on the specified indicators/parameters using the data extracted from all the subjects that were mentioned in the json file. There are 4 different tests that can be run, namely - Mixed ANOVA, Repeated Measures ANOVA, T Test and Simple ANOVA (both 1 and 2 way)

		Parameters
		----------
		parameter_list: set (optional)
			Set of the different indicators/parameters (Pupil_size, Blink_rate) on which statistical analysis is to be performed, by default it will be "all" so that all the parameter are considered.
		between_factor_list: list(str) (optional)
			List of between group factors, by default it will only contain "Subject_type".
			If any additional parameter (eg: Gender) needs to be considered, then the list will be: between_factor_list = ["Subject_type", "Gender"].
			DO NOT FORGET TO INCLUDE "Subject_type", if you wish to consider "Subject_type" as a between group factor.
			Eg: between_factor_list = ["factor_x"] will no longer consider "Subject_type" as a factor.
			Please go through the README FILE to understand how the JSON FILE is to be written for between group factors to be considered.
		within_factor_list: list(str) (optional)
			List of within group factors, by default it will only contain "Stimuli_type"
			If any additional parameter, needs to be considered, then the list will be: between_factor_list = ["Subject_type", "factor_X"].
			DO NOT FORGET TO INCLUDE "Stimuli_type", if you wish to consider "Stimuli_type" as a within group factor.
			Eg: within_factor_list = ["factor_x"] will no longer consider "Stimuli_type" as a factor.
			Please go through how the README FILE to understand how the JSON FILE is to be written for within group factors to be considered.
		statistical_test: str {"Mixed_anova","RM_anova","ttest","anova","None"} (optional)
			Name of the statistical test that has to be performed.
				NOTE:

				- ttest: There are 3 options for ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below.
				- Welch_ttest: There are 2 options for Welch Ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below.
				- Mixed_anova: Only 1 between group factor and 1 within group factor can be considered at any point of time
				- anova: Any number of between group factors can be considered for analysis
				
				- RM_anova: Upto 2 within group factors can be considered at any point of time
		file_creation: bool (optional)
			Indicates whether a csv file containing the statistical results should be created.
				NOTE:
				The name of the csv file created will be by the name of the statistical test that has been chosen.
				A directory called "Results" will be created within the Directory whose path is mentioned in the json file and the csv files will be stored within "Results" directory.
				If any previous file by the same name exists, it will be overwritten.
		ttest_type: int {1,2,3} (optional)
			Indicates what type of parameters will be considered for the ttest and Welch Ttest
				NOTE:
				For ttest-

				- 1: Upto 2 between group factors will be considered for ttest
				- 2: 1 within group factor will be considered for ttest
				
				- 3: 1 within group and 1 between group factor will be considered for ttest

				For Welch ttest-

				- 1: Will consider the first factor in 'between_factor_list'

				- 2: Will consider the first factor in 'within_factor_list' 

		Examples
		--------

		For calculating Mixed ANOVA, on all the parameters, with standardisation, NOT averaging across stimuli of the same type
		and considering Subject_type and Stimuli_type as between and within group factors respectively

		>>> analyse(self, standardise_flag=False, average_flag=False, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation = True)
		OR
		>>> analyse(self, standardise_flag=True) (as many of the option are present by default)

		For calculating 2-way ANOVA, for "blink_rate" and "avg_blink_duration", without standardisation with averaging across stimuli of the same type
		and considering Subject_type and Gender as the between group factors while NOT creating a new csv file with the results

		>>> analyse(self, average_flag=True, parameter_list={"blink_rate", "avg_blink_duration"}, between_factor_list=["Subject_type", "Gender"], statistical_test="anova", file_creation = False)

		"""

		with open(self.json_file, "r") as json_f:
			json_data = json.load(json_f)

		csvFile = None
		if file_creation:
			directory_path = json_data["Path"] + "/Results"
			if not os.path.isdir(directory_path):
				os.mkdir(directory_path)

			if not os.path.isdir(directory_path + '/Data/'):
				os.mkdir(directory_path + '/Data/')

			if statistical_test != None:
				file_path = directory_path + "/" + statistical_test + ".csv"
				csvFile = open(file_path, 'w')
				writer = csv.writer(csvFile)


		meta_not_to_be_considered = ["pupil_size", "pupil_size_downsample"]

		sacc_flag=0
		ms_flag=0

		for sen in self.sensors:
			for meta in Sensor.meta_cols[sen]:
				if meta in meta_not_to_be_considered:
					continue

				if ('all' not in parameter_list) and (meta not in parameter_list):
					continue

				print("\n\n")
				print("\t\t\t\tAnalysis for ",meta)

				#For the purpose of statistical analysis, a pandas dataframe needs to be created that can be fed into the statistical functions
				#The columns required are - meta (indicator), the between factors (eg: Subject type or Gender), the within group factor (eg: Stimuli Type), Subject name/id

				#Defining the list of columns required for the statistical analysis
				column_list = [meta]

				column_list.extend(between_factor_list)
				column_list.extend(within_factor_list)
				column_list.append("subject")
				column_list.append("stimuli_name")

				data =  pd.DataFrame(columns=column_list)

				#For each subject
				for sub_index, sub in enumerate(self.subjects):
					#For each Question Type
					for stimuli_index, stimuli_type in enumerate(sub.aggregate_meta):

						if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]:
							summation_array = self.summationArrayCalculation(meta, sub_index, stimuli_index)
						
						value_array = self.meta_matrix_dict[1][meta][sub_index,stimuli_index]

						index_extra = 0

						for value_index, _ in enumerate(value_array):

							if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]:

								if value_array[value_index] == 0:
									index_extra += 1
									continue

								proper_index = self.return_index(value_index-index_extra, summation_array)
								stimulus_name = self.stimuli[stimuli_type][proper_index]
							else:
								stimulus_name = self.stimuli[stimuli_type][value_index]

							row = []
							row.append(value_array[value_index])

							#Add the between group factors (need to be defined in the json file)
							for param in between_factor_list:

								if param == "Subject_type":
									row.append(sub.subj_type)
									continue

								try:
									row.append(json_data["Subjects"][sub.subj_type][sub.name][param])
								except:
									print("Between subject paramter: ", param, " not defined in the json file")

							for param in within_factor_list:

								if param == "Stimuli_type":
									row.append(stimuli_type)
									continue

								try:
									stimulus_name = self.stimuli[stimuli_type][value_index]
									row.append(json_data["Stimuli"][stimuli_type][stimulus_name][param])
								except:
									print("Within stimuli parameter: ", param, " not defined in the json file")

							row.append(sub.name)
							row.append(stimulus_name)

							if np.isnan(value_array[value_index]):
								print("The data being read for analysis contains null value: ", row)

							#Instantiate into the pandas dataframe
							data.loc[len(data)] = row

				data.to_csv(directory_path + '/Data/' + meta + "_data.csv")

				#print(data)

				#Depending on the parameter, choose the statistical test to be done
				if statistical_test == "Mixed_anova":

					if len(within_factor_list)>1:
						print("Error: Too many within group factors,\nMixed ANOVA can only accept 1 within group factor\n")
					elif len(between_factor_list)>1:
						print("Error: Too many between group factors,\nMixed ANOVA can only accept 1 between group factor\n")

					print(meta, ":\tMixed ANOVA")
					aov = pg.mixed_anova(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data)
					pg.print_table(aov)

					if file_creation:

						values_list = ["Mixed Anova: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, aov, values_list)

					posthocs = pg.pairwise_ttests(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data)
					pg.print_table(posthocs)

					if file_creation:

						values_list = ["Post Hoc Analysis"]
						self.fileWriting(writer, csvFile, posthocs, values_list)

				elif statistical_test == "RM_anova":

					if len(within_factor_list)>2 or len(within_factor_list)<1:
						print("Error: Too many or too few within group factors,\nRepeated Measures ANOVA can only accept 1 or 2 within group factors\n")

					print(meta, ":\tRM ANOVA")
					aov = pg.rm_anova(dv=meta, within= within_factor_list, subject = 'subject', data=data)
					pg.print_table(aov)

					if file_creation:

						values_list = ["Repeated Measures Anova: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, aov, values_list)

				elif statistical_test == "anova":

					print(meta, ":\tANOVA")
					length = len(between_factor_list)
					model_equation = meta + " ~ C("

					for factor_index, _ in enumerate(between_factor_list):
						if(factor_index<length-1):
							model_equation = model_equation + between_factor_list[factor_index] + ")*C("
						else:
							model_equation = model_equation + between_factor_list[factor_index] + ")"

					print("Including interaction effect")
					print(model_equation)
					model = ols(model_equation, data).fit()
					res = sm.stats.anova_lm(model, typ= 2)
					print(res)

					if file_creation:

						values_list = ["Anova including interaction effect: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, res, values_list)

					print("\nExcluding interaction effect")
					model_equation = model_equation.replace("*", "+")
					print(model_equation)
					model = ols(model_equation, data).fit()
					res = sm.stats.anova_lm(model, typ= 2)
					print(res)

					if file_creation:

						values_list = ["Anova excluding interaction effect: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, res, values_list)

				elif statistical_test == "ttest":

					print(meta, ":\tt test")

					if ttest_type==1:
						aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, subject='subject', data=data)
						pg.print_table(aov)
					elif ttest_type==2:
						aov = pg.pairwise_ttests(dv=meta, within=within_factor_list, subject='subject', data=data)
						pg.print_table(aov)
					elif ttest_type==3:
						aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, within=within_factor_list, subject='subject', data=data)
						pg.print_table(aov)
					else:
						print("The value given to ttest_type is not acceptable, it must be either 1 or 2 or 3")


					if file_creation:

						values_list = ["Pairwise ttest: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, aov, values_list)

				elif statistical_test == "welch_ttest":

					print(meta, ":\tWelch t test")

					if ttest_type==1:
						normality,aov = self.welch_ttest(dv=meta, factor=between_factor_list[0], subject='subject', data=data)
						pg.print_table(normality)
						pg.print_table(aov)
					elif ttest_type==2:
						normality,aov = self.welch_ttest(dv=meta, factor=within_factor_list[0], subject='subject', data=data)
						pg.print_table(normality)
						pg.print_table(aov)
					else:
						print("The value given to ttest_type for welch test is not acceptable, it must be either 1 or 2")

					if file_creation:

						values_list = ["Welch Pairwise ttest: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, normality, values_list)
						self.fileWriting(writer, csvFile, aov, values_list)


		if csvFile != None:
			csvFile.close()
Ejemplo n.º 23
0
# ***This one approaches significance
# data = df[["small_size","small_color","small_colorAndSize"]]
# Select large graph
# data = df[["large_size","large_color","large_colorAndSize"]]

# Read in total interactions time dataset
# ***These all fail significance
# df = pd.read_csv('ANOVA_interactions.csv')
# Select just small graph
# This one approaches significance
# data = df[["small_size","small_color","small_colorAndSize"]]
# Select large graph
# data = df[["large_size","large_color","large_colorAndSize"]]

# Run the repeated-measures ANOVA (because this is within-subjects)
aov = pg.rm_anova(data, detailed=True)
pg.print_table(aov)
#print(aov)

# Dataset must be expressed in long format for the pairwise t-tests:
melted = pd.melt(
    data_post,
    id_vars=['Participant'],
    value_vars=["small_size", "small_color", "small_colorAndSize"],
    var_name='condition')

post_hocs = pg.pairwise_ttests(dv='value',
                               within='condition',
                               subject='Participant',
                               data=melted)
post_hocs.round(3)
Ejemplo n.º 24
0
data_merged = pd.read_csv(
    r'C:\Users\user\Desktop\FOCUS\behavioral\P_Merged_var.csv')

### Fill in Nan values in false alarm and omission error (0)
data_merged = data_merged.fillna({'false_alarm': 0, 'om_err': 0})
#data_merged.to_csv(r'C:\Users\user\Desktop\FOCUS\behavioral\P_Merged_var.csv', index = None, header=True)

# ANOVA - does correct reaction time differ between blocks?
aov_corr_rt = anova(dv='corr_rt', between='blocks', data=data_merged)

print(aov_corr_rt)

rep_anov_alarm = pg.rm_anova(data=data_merged,
                             dv='false_alarm',
                             within='blocks',
                             subject='participant',
                             detailed=True)

# follow-up pairwise comparison
pairs_corr_rt = pairwise_tukey(dv='corr_rt',
                               between='blocks',
                               data=data_merged)

print(pairs_corr_rt)

#### ANOVA - does false alarms differ between blocks?
aov_alarms = anova(dv='false_alarm', between='blocks', data=data_merged)

print(aov_alarms)
Ejemplo n.º 25
0
    dfExpTrail['hasAvoidPoint'] = dfExpTrail.apply(lambda x: hasAvoidPoints(eval(x['aimPlayerGridList']), eval(x['avoidCommitPoint'])), axis=1)

    statDF = pd.DataFrame()
    # statDF['avoidCommitPercent'] = dfExpTrail.groupby(['name', 'decisionSteps'])["hasAvoidPoint"].mean()

    statDF['avoidCommitPercent'] = dfExpTrail.groupby(['name', 'decisionSteps', 'conditionName'])["hasAvoidPoint"].mean()

    statDF['ShowCommitmentPercent'] = statDF.apply(lambda x: 1 - x['avoidCommitPercent'], axis=1)

    statDF = statDF.reset_index()
    statDF['participantsType'] = ['RL Agent' if 'max' in name else 'Human' for name in statDF['name']]

    # statDF['avoidCommitPercentSE'] = statDF["avoidCommitPercent"].apply(calculateSE)

    import pingouin as pg
    aov = pg.rm_anova(dv='avoidCommitPercent', within=['decisionSteps', 'conditionName'], subject='name', data=statDF)
    # pg.print_table(aov)

    posthocs = pg.pairwise_ttests(dv='avoidCommitPercent', within=['decisionSteps', 'conditionName'], subject='name', data=statDF)
    # pg.print_table(posthocs)

    import seaborn as sns
    ax = sns.barplot(x="decisionSteps", y="ShowCommitmentPercent", hue="conditionName", data=statDF, ci=68)
    # ax.set(xlabel='Decision Step', ylabel='Show  Commitment Ratio', title='Commitment with Deliberation')
    handles, labels = ax.get_legend_handles_labels()

    # labels.get_texts()[0].set_text('1 obstacle at crossroad')
    # labels.get_texts()[1].set_text('2 obstacles at crossroad')

    plt.xticks(fontsize=16, color='black')
    plt.yticks(fontsize=10, color='black')
Ejemplo n.º 26
0
for part in time_vals:
    print(part)
    part_dir = save_test_dir / part
    # perform rm anova for each stage type
    ph_part_dict = {}
    for key, df in zip(totals_dict.keys(), totals_dict.values()):
        print(key)

        # tidy data
        long_df = df.stack().reset_index()
        long_df.columns = stat_colnames
        part_df = long_df.query("%s == '%s'" % (time, part))

        # do anova
        part_rm = pg.rm_anova(dv=dep_var,
                              within=day,
                              subject=anim,
                              data=part_df)
        pg.print_table(part_rm)

        # do posthoc
        ph = pg.pairwise_tukey(dv=dep_var, between=day, data=part_df)
        pg.print_table(ph)
        ph_part_dict[key] = ph

        stage_test_dir = part_dir / key
        anova_file = stage_test_dir / "01_anova.csv"
        ph_file = stage_test_dir / "02_posthoc.csv"

        part_rm.to_csv(anova_file)
        ph.to_csv(ph_file)
Ejemplo n.º 27
0
# break into long format and groupby evaluation/cutoff

melted = pd.melt(df.reset_index(),
    value_vars=[ c for c in df.columns if 'cutoff' in c ],
    id_vars='participant_id',
    value_name='ld_rate')

melted['eval'], melted['cutoff'] = zip(*melted['variable'].str.split('-'))
avgs = melted.groupby(['eval','cutoff']
    )['ld_rate'].agg(['mean','sem'])

# replace sem for the binary case bc it's meaningless
avgs.loc['binary_ld','sem'] = pd.NA

anova = pg.rm_anova(data=melted[melted['eval']!='binary_ld'],
    dv='ld_rate',within=['eval','cutoff'],
    subject='participant_id',detailed=True)

avgs.to_csv(EXPORT_FNAME_DATA,float_format=FLOAT_FMT,index=True,na_rep='NA')
anova.to_csv(EXPORT_FNAME_STAT,float_format=FLOAT_FMT,index=False)

####################################


#########  draw plot  #########


fig, ax = plt.subplots(figsize=(FIG_WIDTH,FIG_HEIGHT))

# draw lines and points separately to have diff colored points
for ev, subdf in avgs.groupby('eval'):
Ejemplo n.º 28
0
	 sorted(df.Animal.unique())):
	 
	#Query animal data
	trimmed_result = df.loc[df.Notes.isin(['Hab4','Hab5'])\
					 & (df['Animal'] == animal)]
	
	#Run ANOVA across bottles
	bottle_stats = []
	for day in sorted(trimmed_result.Notes.unique()):
		stat_query = trimmed_result.loc[(trimmed_result['Notes'] == day)]
		b_stats = stat_query.anova(dv='LICKS', between=['TUBE'])
		bottle_stats.append(np.round(b_stats.iloc[0,4],2))
	
	#Run Repeated Measures ANOVA across days
	stats = pg.rm_anova(dv='LICKS',
                  within=['Notes'],
                  subject='TUBE', data=trimmed_result,  detailed=True)

	pval = np.format_float_scientific(stats.iloc[0,5],1, exp_digits=2)
	rm_stats.append(pval)
		
	#Establish plot location
	ax = axes_list.pop(0)

	#Plot
	sns.barplot(x='Notes',\
				y='LICKS',\
				hue='TUBE',\
				data=trimmed_result,
				order =['Hab4','Hab5'],
				palette=sns.color_palette("PuBu_r", len(trimmed_result.Notes.unique())+1),\
Ejemplo n.º 29
0
    pivot_t = True

    if pivot_t:
        pivot_t = pd.pivot_table(data, index = ["crowdingcons", "participant_N"], columns = ["winsize"],
                                 values = "deviation_score")
        pivot_t.to_csv("pt_exp1.csv")

    data_1 = data.groupby(["participant_N", "winsize", "crowdingcons"])[dv].agg(
            ["mean", "std"]).reset_index(level = ["participant_N", "winsize", "crowdingcons"])
    rename_df_col(df = data_1, old_col_name = "mean", new_col_name = dv)

    # mean crowding vs. no-crowding
    crowdingcon = 1
    cal_ds_mean(data, crowdingcon = crowdingcon)
    cal_ds_std(data, crowdingcon = crowdingcon)

    # 2 way annova
    aov = pg.rm_anova(dv = dv,
                      within = ["winsize", "crowdingcons"],
                      subject = "participant_N",
                      data = data_1)
    # post hoc
    posthocs = pg.pairwise_ttests(dv = dv,
                                  within = ["winsize", "crowdingcons"],
                                  subject = "participant_N",
                                  data = data_1,
                                  padjust = "fdr_bh",
                                  effsize = "cohen")


        .dropna() \
        .sort_values(by=["cond", "sub"]) \
        .reset_index()

    data_stats["index"] = \
        np.array([list(range(int(data_stats["cond"].shape[0]/len(conds))))] \
                 *len(conds)).flatten()

    # Perform repeated measures anova
    ind_col = "index"
    lab_col = "cond"
    val_col = "instability"

    anova_output = pg.rm_anova(data=data_stats,
                              dv=val_col,
                              within=lab_col,
                              subject=ind_col,
                              detailed=True) \

    if anova_output["p-unc"][0] < 0.05:
        res_an = anova_output.loc[:, idx["F", "p-unc"]]
        res_pwc = pd.DataFrame(None, columns=["A", "B", "T", "p-corr"])

        # Protected post hoc t-test (LSD)
        degf = anova_output.loc[1, "DF"]
        SSE = anova_output.loc[1, "SS"]
        MSE = SSE / degf

        n = len(conds)

        combos_labels = list(itertools.combinations(conds, 2))