예제 #1
0
def pg_ttest(data,
             group_col,
             group1,
             group2,
             fdr=0.05,
             value_col='MS signal [Log2]'):
    '''
    data: long data format with ProteinID as index, one column of protein levels, other columns of grouping.
    '''
    df = data.copy()
    proteins = data.index.unique()
    columns = pg.ttest(x=[1, 2], y=[3, 4]).columns
    scores = pd.DataFrame(columns=columns)
    for i in proteins:
        df_ttest = df.loc[i]
        x = df_ttest[df_ttest[group_col] == group1][value_col]
        y = df_ttest[df_ttest[group_col] == group2][value_col]
        difference = y.mean() - x.mean()
        result = pg.ttest(x=x, y=y)
        result['protein'] = i
        result['difference'] = difference
        scores = scores.append(result)
    scores = scores.assign(new_column=lambda x: -np.log10(scores['p-val']))
    scores = scores.rename({'new_column': '-Log pvalue'}, axis=1)

    #FDR correction
    reject, qvalue = multi.fdrcorrection(scores['p-val'],
                                         alpha=0.05,
                                         method='indep')
    scores['qvalue'] = qvalue
    scores['rejected'] = reject
    scores = scores.set_index('protein')
    return scores
def two_sample_ttests(group_var,
                      DVs,
                      data,
                      paired=False,
                      tails='two-sided',
                      test_name='Mean Difference',
                      **correction_args):
    """ Performs a series of two-sample t-tests, collecting all the statistics
        and returning a nicely formatted dataframe of results.

    Args:
        group_var (string): The variables that will be used to group/split
            the dataset. Bust have only two levels.
        DVs (list-like): the names of dependent variables. One t-test will be 
            done for each element of this list.
        data (Pandas dataframe): the raw data frame with variables as columns
            and rows as observations.
    
    Optional Args:
        paired (boolean): if True, performs paired-sample t-tests.
        tails (string): specifies "two-sided" or "one-sided" t-tests.
        test_name (string): the label for the test difference column.
        **correction args: keyword arguments that get passed to adust_pvals()

    """

    from pingouin import ttest
    grp_data = [d for _, d in data.groupby(group_var)]
    assert (len(grp_data) == 2)

    results = []
    for dv in DVs:
        t = ttest(grp_data[0][dv],
                  grp_data[1][dv],
                  confidence=(1 - corrected_alpha_from(**correction_args)),
                  paired=paired,
                  tail=tails)
        t.index.names = ['contrast']
        t['value'] = grp_data[0][dv].mean() - grp_data[1][dv].mean()
        results.append(t)

    results = (pd.concat(results, names=['score'],
                         keys=DVs).rename(columns={
                             'p-val': 'p',
                             'T': 'tstat',
                             'dof': 'df',
                             'value': 'diff'
                         },
                                          index={'T-test': test_name}))

    # unpack the CIs provided by pingouin
    ci_col = [c for c in results.columns if c[0:2] == 'CI'][0]
    results = (results.assign(
        CI_lower=results[ci_col].apply(lambda x: x[0])).assign(
            CI_upper=results[ci_col].apply(lambda x: x[1])).rename(
                columns={ci_col: 'CI'}))

    results = adjust_pvals(results, **correction_args)

    return results
예제 #3
0
    def perform_kappa_t_tests(self):

        self.anklewrist_t = pg.ttest(
            x=self.df_kappa["All_AnkleWrist"].dropna(),
            y=self.df_kappa["AnkleWrist"],
            paired=False,
            correction="Auto")
        print("\nAnkle-Wrist Comparison")
        print(self.anklewrist_t)

        self.wristhr_t = pg.ttest(x=self.df_kappa["All_WristHR"].dropna(),
                                  y=self.df_kappa["WristHR"].dropna(),
                                  paired=False,
                                  correction="Auto")
        print("\nWrist-HR Comparison")
        print(self.wristhr_t)
예제 #4
0
    def generate_quality_report(self, write_report=True):
        """Calculates how much of the data was usable. Returns values in dictionary."""

        valid_epochs = self.epoch_validity.count(0)  # number of valid epochs
        invalid_epochs = self.epoch_validity.count(1)  # number of invalid epochs
        hours_lost = round(invalid_epochs / (60 / self.epoch_len) / 60, 2)  # hours of invalid data
        perc_valid = round(valid_epochs / len(self.epoch_validity) * 100, 1)  # percent of valid data
        perc_invalid = round(invalid_epochs / len(self.epoch_validity) * 100, 1)  # percent of invalid data

        # Average Bittium accelerometer counts during invalid, valid, and non-wear epochs ----------------------------
        df_valid = self.output_df.groupby("Valid").get_group("Valid")
        df_invalid = self.output_df.groupby("Valid").get_group("Invalid")
        df_invalid = df_invalid.loc[df_invalid["Wear"] == "Wear"]
        df_nonwear = self.output_df.groupby("Wear").get_group("Nonwear")

        if self.load_accel:
            valid_counts = df_valid.describe()["AccelCounts"]['mean']
            invalid_counts = df_invalid.describe()["AccelCounts"]['mean']
            nonwear_counts = df_nonwear.describe()["AccelCounts"]['mean']

            ttest = pg.ttest(df_valid["AccelCounts"], df_invalid["AccelCounts"], paired=False)
            print("\nUnpaired T-test results: valid vs. invalid ECG epochs' activity counts:")
            print("t({}) = {}, p = {}, Cohen's d = {}.".format(round(ttest["dof"].iloc[0], 1),
                                                               round(ttest["T"].iloc[0], 2),
                                                               round(ttest["p-val"].iloc[0], 3),
                                                               round(ttest["cohen-d"].iloc[0], 3)))
            t = round(ttest["T"].iloc[0], 3)
            p = round(ttest["p-val"].iloc[0], 5)

        if not self.load_accel:
            invalid_counts = 0
            valid_counts = 0
            nonwear_counts = 0
            t = 0
            p = 0

        quality_report = {"Invalid epochs": invalid_epochs, "Hours lost": hours_lost,
                          "Percent valid": perc_valid, "Percent invalid": perc_invalid,
                          "Valid counts": round(valid_counts, 1),
                          "Invalid counts": round(invalid_counts, 1),
                          "Nonwear counts": round(nonwear_counts, 1),
                          "Counts T": t, "Counts p": p}

        print("{}% of the data is valid.".format(round(100 - perc_invalid), 3))

        if write_report:
            df = pd.DataFrame(list(zip([i for i in quality_report.keys()],
                                       [i for i in quality_report.values()])),
                              columns=["Variable", "Value"])

            df.to_csv(path_or_buf=self.output_dir + self.filename + "_QualityReport.csv", sep=",", index=False)

        return quality_report
예제 #5
0
def analyse(baseline, modified):
    try:
        stats = ttest(baseline.builds,
                      modified.builds,
                      paired=True,
                      tail="greater").round(3)
        pvalue = stats.loc["T-test", "p-val"]
        improvement_detected = pvalue < SIGNIFICANCE_LEVEL
        details = PairedTTestDetails(pvalue, SIGNIFICANCE_LEVEL)
        return AnalysisResults(baseline, modified, details,
                               improvement_detected)
    except:
        logging.exception("Error when running analyser")
        raise Exception("Failed when running analyser for benchmarks")
예제 #6
0
    def test_independence(self, column_name, test_stat, theor=False, direction='both', **kwargs):
        """ Permutation based independence test, desired test stat should be provided

        Args:
            test_stat (str or list of string):  np.mean, np.median, np.std, np.percentile or several
            theor (str): provide theoretical t_stat results
            nsamples(int): number of samples for simulation
            direction (str):
            column_name (str): column name in dataframe for testing

        Returns:
            empirical test statitistics
            test_stat sample
            simulation based p_val
            dataframe with ttest output if theor = True

        """
        self.test_type = 'independence'
        self.direction = direction
        self.theor = theor

        def permutation_sample(data1, data2):
            data = np.concatenate((data1, data2))
            permuted_data = np.random.permutation(data)
            return permuted_data[:len(data1)], permuted_data[len(data1):]

        data1 = self.data[self.data['male'] == 0][column_name]
        data2 = self.data[self.data['male'] == 1][column_name]

        p1, p2 = permutation_sample(data1, data2)

        # empirical test statitistics
        t_diff = test_stat(data2) - test_stat(data1)

        # test_stat sample
        sample_test = []

        # simulation based p_val
        diffs = np.squeeze(np.diff([list(map(test_stat, p1, p2)) for i in range(self.nsamples)]))
        p_val = test_stat(diffs > t_diff)

        if self.theor:
            ttest = pg.ttest(x=data1, y=data2).round(2)
            return t_diff, sample_test, p_val, ttest
        else:
            return t_diff, sample_test, p_val
예제 #7
0
    def perform_kappa_ttest(self):

        # MIXED ANOVA  ------------------------------------------------------------------------------------------------
        print(
            "\nPerforming unpaired T-test between activity groups on Cohen's Kappa values."
        )

        high = self.df_kappa_long.groupby("Group").get_group("HIGH")["Kappa"]
        low = self.df_kappa_long.groupby("Group").get_group("LOW")["Kappa"]

        self.kappa_ttest = pg.ttest(high, low, paired=False, correction='auto')

        # Approximates hedges g using d x (1 - (3 / (4*(n1 + n2) - 9))
        self.kappa_ttest["hedges-g"] = self.kappa_ttest["cohen-d"] * (
            1 - (3 / (4 * 2 * high.shape[0] - 9)))
        print(self.kappa_ttest)

        self.kappa_wilcoxon = pg.wilcoxon(high, low)
        print(self.kappa_wilcoxon)
예제 #8
0
def describe_clusters(df:pd.DataFrame, labs: np.array, dec:int)->None:
    '''Lập bảng so sánh 10 biến giữa 2 phân cụm
    :df: dataframe dữ liệu gốc,
    :labs: chuỗi labels cho 2 phân cụm
    :dec: số lẻ cho việc làm tròn

    Kết quả là 1 dataframe bảng thống kê mô tả và kiểm định t, 
    sao lưu trong thư mục Output
    ''' 

    df['C'] = labs

    res_df = df.groupby('C').agg(lambda x: f"{np.round(np.mean(x),dec)} ± {np.round(np.std(x),dec)}").T
    res_df.columns = ['Cụm 1', 'Cụm 2']

    res_df['Toàn thể'] = df.agg(lambda x: f"{np.round(np.mean(x),dec)} ± {np.round(np.std(x),dec)}").T

    res_df = res_df[['Toàn thể', 'Cụm 1', 'Cụm 2']]

    p_val = []

    for v in res_df.index:
        p = pg.ttest(df[df['C'] == 0][v], df[df['C']==1][v], tail='one-sided')['p-val'][0]
        p_val.append(p)

    res_df['Giá trị p'] = p_val

    res_df.index = res_df.index.map(col_names)

    print('Kết quả thống kê mô tả 2 phân cụm:')
    print('='*30)
    print(res_df.to_string())

    csv_name = os.path.join(output_folder, f"Table.xlsx")

    res_df.to_excel(csv_name, index = True, encoding='utf-8')
예제 #9
0
plt.ylim(0, 100)
plt.yticks(range(0, 130, 20))
plt.ylabel('Time in Arena Periphery (%)')
peri_ax.get_legend().remove()
plt.tight_layout()
#%%
OF_plot.savefig('/Users/felipeantoniomendezsalcido/Desktop/OF_analysis2.png', dpi=300)
OF_data

# Area Timm Analysis

timm_df = pd.read_csv('/Users/felipeantoniomendezsalcido/Desktop/Data/Timm Area.csv')
timm_df.columns
timm_df['Group'] = timm_df['Subject'].astype('str').str[0:3]
timm_df['Mean Area'][timm_df['Genotype'] == 'WT']
pg.ttest(x=timm_df['Mean Area'][timm_df['Genotype'] == 'WT'],
         y=timm_df['Mean Area'][timm_df['Genotype'] == 'KO'], paired=False)

#%%
timm_fig, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(7, 3))
timm_point = sns.pointplot(x='Level', y='Mean Area', hue='Genotype', data=timm_df, palette=[
                           'b', 'g'], capsize=.05, scale=.7, errorwidth=.05, ci=68, ax=a0, label=['a', 'b'])
a0.set_ylabel(r'Mean Area ($\mu$m$^2$)')
a0.set_xlabel('from Bregma (mm)')
a0.invert_xaxis()
sns.despine()
timm_point.get_legend().remove()
timm_fig.legend(loc='upper right', bbox_to_anchor=(.29, .93), ncol=1)
timm_total = sns.barplot(x='Genotype', y='Mean Area', data=timm_df,
                         palette=['b', 'g'], ax=a1, ci=68, capsize=0.05, errwidth=1.5)
a1.set_ylabel(r'Mean Area ($\mu$m$^2$)')
a1.annotate('***', xy=(0.5, .98), xytext=(0.5, .96), xycoords='axes fraction', fontsize=18, ha='center',
예제 #10
0
def graph_gPPI():
    from fg_config import lgroup

    import matplotlib.pyplot as plt
    import pingouin as pg
    import seaborn as sns

    # ROIS = ['rACC','sgACC','lh_hpc','rh_hpc','lh_amyg','rh_amyg']
    ROIS = ['rh_hpc']
    # COPES = ['acq_ext','ext_acq']
    COPES = ['ext_acq']
    groups = ['healthy', 'ptsd']

    df = pd.read_csv('extracted_mem_gPPI.csv')
    df = df.groupby(['seed', 'cope', 'target', 'subject']).mean().reset_index()
    df['group'] = df.subject.apply(lgroup)
    df = df.set_index(['cope', 'group', 'seed', 'target']).sort_index()

    stats = pd.DataFrame(columns=['t', 'p'],
                         index=pd.MultiIndex.from_product(
                             [groups, ROIS, ROIS],
                             names=['group', 'seed', 'target']))
    gstats = pd.DataFrame(columns=['t', 'p'],
                          index=pd.MultiIndex.from_product(
                              [ROIS, ROIS], names=['seed', 'target']))

    for seed in ROIS:
        for group in groups:
            for target in ROIS:
                tres = pg.ttest(df.loc[('ext_acq', group, seed, target),
                                       'conn'].values,
                                0,
                                tail='two-sided')
                stats.loc[(group, seed,
                           target)][['t',
                                     'p']] = tres.loc['T-test'][['T', 'p-val']]

        # gres = pg.ttest(df.loc[('ext_acq','healthy',seed,target),'conn'].values,df.loc[('ext_acq','ptsd',seed,target),'conn'].values,paired=False)
        # gstats.loc[(seed,target)][['t','p']] = gres.loc['T-test'][['T','p-val']]

    # mask = np.zeros([len(ROIS),len(ROIS)])
    # mask[np.diag_indices_from(mask)] = True

    # fig, (gax, gcbar) = plt.subplots(2,2,gridspec_kw={'height_ratios':(.9,.05),'hspace':.5})
    # for j, cope in enumerate(COPES):

    #     gt = gstats.loc[(cope),'t'].unstack(level=-1).astype(float).loc[ROIS][ROIS]
    #     gp = gstats.loc[(cope),'p'].apply(pconvert).unstack(level=-1).astype(str).loc[ROIS][ROIS]

    #     sns.heatmap(gt,mask=mask,ax=gax[j],square=True,
    #                 annot=gp,fmt='',cmap='PRGn',center=0,vmin=-3,vmax=3,
    #                 cbar_ax=gcbar[j],cbar_kws={'orientation':'horizontal'})
    #     gax[j].set_title(cope + '_group_comp')

    fig1, (ax1, cbar1) = plt.subplots(2,
                                      2,
                                      gridspec_kw={
                                          'height_ratios': (.9, .05),
                                          'hspace': .5
                                      })
    fig2, (ax2, cbar2) = plt.subplots(2,
                                      2,
                                      gridspec_kw={
                                          'height_ratios': (.9, .05),
                                          'hspace': .5
                                      })

    for i, group in enumerate(groups):
        t = stats.loc[(group),
                      't'].unstack(level=-1).astype(float).loc[ROIS][ROIS]
        p = stats.loc[(group), 'p'].apply(pconvert).unstack(
            level=-1).astype(str).loc[ROIS][ROIS]

        # sns.heatmap(t,mask=mask,ax=ax[i],square=True,
        #             annot=p,fmt='',cmap='PRGn',center=0,vmin=-3,vmax=3,
        #             cbar_ax=cbar[i],cbar_kws={'orientation':'horizontal'})
        # # ax[i].set_title(group + '_' + cope)

        # pfc_targ_t = t.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'),['rACC','sgACC']].T
        # pfc_targ_p = p.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'),['rACC','sgACC']].T
        pfc_targ_t = t.loc[('rh_hpc'), ['rACC', 'sgACC']].T
        pfc_targ_p = p.loc[('rh_hpc'), ['rACC', 'sgACC']].T

        sns.heatmap(pfc_targ_t,
                    ax=ax1[i],
                    annot=pfc_targ_p,
                    square=True,
                    fmt='',
                    cmap='PRGn',
                    center=0,
                    vmin=-3,
                    vmax=3,
                    cbar_ax=cbar1[i],
                    cbar_kws={'orientation': 'horizontal'})
        ax1[i].set_title(group + ' ext vs. acq')

        pfc_seed_t = t.loc[('rACC', 'sgACC'),
                           ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T
        pfc_seed_p = p.loc[('rACC', 'sgACC'),
                           ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T

        sns.heatmap(pfc_seed_t,
                    ax=ax2[i],
                    annot=pfc_seed_p,
                    square=True,
                    fmt='',
                    cmap='PRGn',
                    center=0,
                    vmin=-3,
                    vmax=3,
                    cbar_ax=cbar2[i],
                    cbar_kws={'orientation': 'horizontal'})
        ax2[i].set_title(group + ' ext vs. acq')

    ####encoding!####
    edf = pd.read_csv('extracted_encode_gPPI.csv')
    edf = edf.set_index(['cope', 'phase', 'seed', 'target', 'subject'])
    edf = (edf.loc['csp', 'extinction'] -
           edf.loc['csp', 'acquisition']).reset_index()
    edf['group'] = edf.subject.apply(lgroup)
    edf = edf.set_index(['group', 'seed', 'target'])

    estats = pd.DataFrame(columns=['t', 'p'],
                          index=pd.MultiIndex.from_product(
                              [groups, ROIS, ROIS],
                              names=['group', 'seed', 'target']))

    for seed in ROIS:
        for group in groups:
            for target in ROIS:
                etres = pg.ttest(edf.loc[(group, seed, target), 'conn'].values,
                                 0,
                                 tail='two-sided')
                estats.loc[(group, seed,
                            target)][['t', 'p'
                                      ]] = etres.loc['T-test'][['T', 'p-val']]

    fig3, (ax3, cbar3) = plt.subplots(2,
                                      2,
                                      gridspec_kw={
                                          'height_ratios': (.9, .05),
                                          'hspace': .5
                                      })
    fig4, (ax4, cbar4) = plt.subplots(2,
                                      2,
                                      gridspec_kw={
                                          'height_ratios': (.9, .05),
                                          'hspace': .5
                                      })

    for i, group in enumerate(groups):
        t = estats.loc[(group),
                       't'].unstack(level=-1).astype(float).loc[ROIS][ROIS]
        p = estats.loc[(group), 'p'].apply(pconvert).unstack(
            level=-1).astype(str).loc[ROIS][ROIS]

        # sns.heatmap(t,mask=mask,ax=ax[i],square=True,
        #             annot=p,fmt='',cmap='PRGn',center=0,vmin=-3,vmax=3,
        #             cbar_ax=cbar[i],cbar_kws={'orientation':'horizontal'})
        # # ax[i].set_title(group + '_' + cope)

        pfc_targ_t = t.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'),
                           ['rACC', 'sgACC']].T
        pfc_targ_p = p.loc[('rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg'),
                           ['rACC', 'sgACC']].T

        sns.heatmap(pfc_targ_t,
                    ax=ax3[i],
                    annot=pfc_targ_p,
                    square=True,
                    fmt='',
                    cmap='PRGn',
                    center=0,
                    vmin=-3,
                    vmax=3,
                    cbar_ax=cbar3[i],
                    cbar_kws={'orientation': 'horizontal'})
        ax3[i].set_title(group + ' ext vs. acq')

        pfc_seed_t = t.loc[('rACC', 'sgACC'),
                           ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T
        pfc_seed_p = p.loc[('rACC', 'sgACC'),
                           ['rh_hpc', 'lh_hpc', 'rh_amyg', 'lh_amyg']].T

        sns.heatmap(pfc_seed_t,
                    ax=ax4[i],
                    annot=pfc_seed_p,
                    square=True,
                    fmt='',
                    cmap='PRGn',
                    center=0,
                    vmin=-3,
                    vmax=3,
                    cbar_ax=cbar4[i],
                    cbar_kws={'orientation': 'horizontal'})
        ax4[i].set_title(group + ' ext vs. acq')
예제 #11
0
def graph_gPPI_better():

    from fg_config import lgroup
    from pysurfer import bnsurf

    import matplotlib.pyplot as plt
    import pingouin as pg
    import seaborn as sns
    # sns.set_context('talk')

    df = pd.read_csv('extracted_mem_gPPI.csv')
    df['group'] = df.subject.apply(lgroup)

    vm = (df.target == 'sgACC')
    d = (df.target == 'rACC')
    pfc = df[d | vm]
    pfc = pfc[pfc.seed != 'rh_hpc']
    pfc.target = pfc.target.apply(lambda x: 'vmPFC'
                                  if x == 'sgACC' else 'dACC')
    for cope in pfc.cope.unique():
        g = sns.catplot(data=pfc[pfc.cope == cope],
                        x='target',
                        y='conn',
                        hue='seed',
                        col='group',
                        kind='bar',
                        palette='mako',
                        hue_order=[
                            'amyg_cem', 'amyg_bla', 'hc_head', 'hc_body',
                            'hc_tail'
                        ],
                        sharey=False,
                        height=10,
                        aspect=1.2)
        plt.subplots_adjust(top=0.9)
        g.fig.suptitle(cope)
        plt.savefig('plots/roi_conn/%s.png' % (cope), fmt='png')

    # seeds = ['rh_hpc','hc_tail','hc_body','hc_head','amyg_bla','amyg_cem']
    seeds = ['hc_tail', 'hc_body', 'hc_head', 'amyg_bla', 'amyg_cem']
    targets = [
        'A32sg', 'A32p', 'A24cd', 'A24rv', 'A14m', 'A11m', 'A13', 'A10m',
        'A9m', 'A8m', 'A6m'
    ]
    # targets = ['rh_hpc','hc_tail','hc_body','hc_head','amyg_bla','amyg_cem','sgACC','rACC','A32sg','A32p','A24cd','A24rv','A14m','A11m','A13','A10m','A9m','A8m','A6m']

    copes = ['ext_acq', 'ext_csp_csm', 'acq_csp_csm']
    groups = ['healthy', 'ptsd']

    df = df.set_index(['cope', 'group', 'seed', 'target', 'subject'])

    stats = pd.DataFrame(columns=['t', 'p'],
                         index=pd.MultiIndex.from_product(
                             [groups, seeds, copes, targets],
                             names=['group', 'seed', 'cope', 'target']))
    for seed in seeds:
        for group in groups:
            for cope in copes:
                for target in targets:
                    tres = pg.ttest(df.loc[(cope, group, seed, target),
                                           'conn'].values,
                                    0,
                                    tail='two-sided')
                    stats.loc[(group, seed, cope, target)][[
                        't', 'p'
                    ]] = tres.loc['T-test'][['T', 'p-val']]

            # stats.loc[(group,seed,cope),'p'] = pg.multicomp(list(stats.loc[(group,seed,cope),'p'].values),method='fdr_bh')[1]

    stats['p_mask'] = stats.p.apply(lambda x: 0 if x > .05 else 1)
    stats['t_disp'] = stats.t * stats.p_mask

    for group in groups:
        for seed in seeds:
            for cope in copes:
                disp = stats.loc[group, seed, cope]
                if disp.t_disp.min() == 0 and disp.t_disp.max() == 0:
                    pass
                else:
                    if disp.t_disp.max() > 0:
                        cmap = 'Reds'
                        tail = 'greater'
                    else:
                        cmap = 'Blues_r'
                        tail = 'less'
                    bnsurf(disp,
                           't_disp',
                           cmap,
                           tail=tail,
                           out='conn/%s_%s_%s' % (group, seed, cope))
    #bnsurf(data,val,cmap,tail='greater',out=None):

    stats = stats.reset_index()
    stats.loc[np.where(stats.p < 0.05)[0]]
예제 #12
0
def stats(model, quantity, data, targets, tw, rm, nd):
    if model == 'absolute':
        data = data.drop(['NormQuant'], axis=1)
        data['NormMean'] = data['NormMean'].astype(float)
        mean = 'NormMean'
    else:
        data = data.drop(['rq'], axis=1)
        data['rqMean'] = data['rqMean'].astype(float)
        mean = 'rqMean'

    # prepare data from intermediate dataframe
    data = data[data['Outliers'].eq(False)]
    data = data.drop_duplicates(keep='first')

    # t-test and anova for normally distributed data
    if nd == 'True':
        if quantity == 2:
            # T-Test between 2 groups
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            group = data['Group'].dropna()
            group = group.drop_duplicates(keep='first').values.tolist()
            for item in targets:
                df = data[data['Target Name'].eq(item)]
                group1 = df[df['Group'].eq(group[0])][mean]
                group2 = df[df['Group'].eq(group[1])][mean]
                t_test = ttest(group1, group2, paired=bool(rm))

                if rm == 'True':
                    t_test['paired'] = 'TRUE'
                else:
                    t_test['paired'] = 'FALSE'
                t_test['Target Name'] = item
                if stats_dfs is None:
                    stats_dfs = t_test
                else:
                    stats_dfs = stats_dfs.append(t_test, ignore_index=True)
            # reformat output table
            stats_dfs = stats_dfs.rename(columns={
                'cohen-d': 'effect size',
                'BF10': 'Bayes factor',
                'dof': 'DF'
            })
            cols = [
                'Target Name', 'DF', 'T', 'tail', 'paired', 'p-val',
                'effect size', 'power', 'Bayes factor'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)
        elif quantity >= 3:
            # ANOVA test
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            # tukey_dfs = pandas.DataFrame()
            pvals = []
            for item in targets:
                if rm == 'True':
                    # one-way
                    if tw == 'False':
                        # repeated measure anova
                        aov = pg.rm_anova(
                            dv=mean,
                            data=data[data['Target Name'].eq(item)],
                            within='Group',
                            subject='Sample Name',
                            detailed=True)
                        pvals.append(aov['p-unc'][0])
                        aov = aov.drop([1])
                        aov['measures'] = ['dependent']
                        aov['Target Name'] = item
                    # two-way
                    else:
                        aov = pg.rm_anova(
                            dv=mean,
                            data=data[data['Target Name'].eq(item)],
                            within=['Group1', 'Group2'],
                            subject='Sample Name',
                            detailed=True)
                        reject_tw, pval_corr_tw = pg.multicomp(list(
                            aov['p-unc']),
                                                               alpha=0.05,
                                                               method='bonf')
                        aov['p-value corrected'] = pval_corr_tw
                        aov['measures'] = ['dependent'] * 3
                        aov['Target Name'] = [item] * 3
                    aov.drop(['eps'], axis=1)
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        within='Group',
                        subject='Sample Name',
                        padjust='fdr_bh')
                    ph['Target Name'] = item
                    ph['Test'] = 'T-Test'
                else:
                    # one-way
                    if tw == 'False':
                        aov = pg.anova(dv=mean,
                                       between='Group',
                                       data=data[data['Target Name'].eq(item)],
                                       detailed=True)
                        pvals.append(aov['p-unc'][0])
                        aov = aov.drop([1])
                        aov['measures'] = ['independent']
                        aov['Target Name'] = item
                        ph = pairwise_ttests(
                            data=data[data['Target Name'].eq(item)],
                            dv=mean,
                            between='Group',
                            padjust='fdr_bh')
                        ph['Test'] = 'T-Test'
                    # two-way
                    else:
                        aov = pg.anova(dv=mean,
                                       between=['Group1', 'Group2'],
                                       data=data[data['Target Name'].eq(item)],
                                       detailed=False)
                        aov = aov.drop([3])
                        reject_tw, pval_corr_tw = pg.multicomp(list(
                            aov['p-unc']),
                                                               alpha=0.05,
                                                               method='bonf')
                        aov['p-value corrected'] = pval_corr_tw
                        aov['measures'] = ['independent'] * 3
                        aov['Target Name'] = [item] * 3
                        ph = pairwise_ttests(
                            data=data[data['Target Name'].eq(item)],
                            dv=mean,
                            between=['Group1', 'Group2'],
                            padjust='fdr_bh')
                        ph['Test'] = 'T-Test'
                    ph['Target Name'] = item
                if stats_dfs is None:
                    stats_dfs = aov
                else:
                    stats_dfs = stats_dfs.append(aov, ignore_index=True)
                if posthoc_dfs is None:
                    posthoc_dfs = ph
                else:
                    posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True)

            reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf')

            # reformat output tables
            stats_dfs = stats_dfs.rename(columns={
                'p-unc': 'p-value',
                'np2': 'effect size'
            })
            if tw == 'False':
                stats_dfs['p-value corrected'] = pvals_corr
                stats_dfs['distribution'] = ['parametric'] * len(targets)
                stats_dfs['test'] = ['ANOVA'] * len(targets)
                stats_dfs['statistic'] = ['NA'] * len(targets)
            else:
                stats_dfs['distribution'] = ['parametric'] * (len(targets) * 3)
                stats_dfs['test'] = ['ANOVA'] * (len(targets) * 3)
                stats_dfs['statistic'] = ['NA'] * (len(targets) * 3)
            cols = [
                'Target Name', 'Source', 'DF', 'F', 'MS', 'SS', 'p-value',
                'p-value corrected', 'measures', 'distribution', 'test',
                'statistic', 'effect size'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)
            if tw == 'False':
                posthoc_dfs = posthoc_dfs.drop(['Contrast', 'T'], axis=1)
            else:
                posthoc_dfs = posthoc_dfs.drop(['T'], axis=1)
            posthoc_dfs = posthoc_dfs.rename(
                columns={
                    'hedges': 'effect size',
                    'p-corr': 'p-value corrected',
                    'p-unc': 'p-value',
                    'p-adjust': 'correction method',
                    'BF10': 'Bayes factor',
                    'dof': 'DF'
                })
            if tw == 'False':
                cols2 = [
                    'Target Name', 'A', 'B', 'DF', 'p-value corrected',
                    'p-value', 'correction method', 'Paired', 'Parametric',
                    'Test', 'effect size', 'Bayes factor'
                ]
            else:
                cols2 = [
                    'Target Name', 'Contrast', 'Group1', 'A', 'B', 'DF',
                    'p-value corrected', 'p-value', 'correction method',
                    'Paired', 'Parametric', 'Test', 'effect size',
                    'Bayes factor'
                ]
            posthoc_dfs = posthoc_dfs.reindex(columns=cols2)

    # nonparametric tests for not normally distributed data
    else:
        if quantity == 2:
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()
            group = data['Group'].dropna()
            group = group.drop_duplicates(keep='first').values.tolist()
            for item in targets:
                df = data[data['Target Name'].eq(item)]
                group1 = df[df['Group'].eq(group[0])][mean]
                group2 = df[df['Group'].eq(group[1])][mean]
                if rm == 'True':
                    # Mann-Whitney U test
                    test = mannwhitneyu(group1, group2)
                    test = pandas.DataFrame(
                        {
                            'Target Name': item,
                            'pvalue': test.pvalue,
                            'statistic': test.statistic
                        },
                        index=[0])
                else:
                    # Wilcoxon
                    test = wilcoxon(group1, group2)
                    test = pandas.DataFrame(
                        {
                            'Target Name': item,
                            'pvalue': test.pvalue,
                            'statistic': test.statistic
                        },
                        index=[0])
                if stats_dfs is None:
                    stats_dfs = test
                else:
                    stats_dfs = stats_dfs.append(test, ignore_index=True)

        elif quantity >= 3:
            stats_dfs = pandas.DataFrame()
            posthoc_dfs = pandas.DataFrame()

            pvals = []
            for item in targets:
                if rm == 'True':
                    # friedman test for repeated measurements
                    df = pg.friedman(dv=mean,
                                     within='Group',
                                     subject='Sample Name',
                                     data=data[data['Target Name'].eq(item)])
                    pvals.append(df['p-unc'][0])
                    df['test'] = ['Friedman Q']
                    df['measures'] = ['dependent']
                    df = df.rename(columns={'Q': 'statistic'})
                    df['Target Name'] = item
                    df['DF'] = 'NA'
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        within='Group',
                        subject='Sample Name',
                        padjust='fdr_bh',
                        parametric=False)
                    ph['Target Name'] = item
                    ph['DF'] = 'NA'
                    ph['Bayes factor'] = 'NA'
                    ph['Test'] = 'Wilcoxon'
                else:
                    # Kruskal-Wallis H test
                    df = pg.kruskal(dv=mean,
                                    between='Group',
                                    data=data[data['Target Name'].eq(item)])
                    pvals.append(df['p-unc'][0])
                    df['test'] = ['Kruskal-Wallis H']
                    df['measures'] = ['independent']
                    df = df.rename(columns={'H': 'statistic'})
                    df['Target Name'] = item
                    df['DF'] = 'NA'
                    ph = pairwise_ttests(
                        data=data[data['Target Name'].eq(item)],
                        dv=mean,
                        between='Group',
                        padjust='fdr_bh',
                        parametric=False)
                    ph['Target Name'] = item
                    ph['DF'] = 'NA'
                    ph['Bayes factor'] = 'NA'
                    ph['Test'] = 'Mann-Whitney U'
                if stats_dfs is None:
                    stats_dfs = df
                else:
                    stats_dfs = stats_dfs.append(df, ignore_index=True)
                if posthoc_dfs is None:
                    posthoc_dfs = ph
                else:
                    posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True)

            reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf')
            # reformat output tables
            stats_dfs = stats_dfs.rename(columns={
                'dof': 'DF',
                'p-unc': 'p-value'
            })
            stats_dfs['p-value corrected'] = pvals_corr
            stats_dfs['distribution'] = ['non-parametric'] * len(targets)
            stats_dfs['MS'] = ['NA'] * len(targets)
            stats_dfs['SS'] = ['NA'] * len(targets)
            stats_dfs['effect size'] = ['NA'] * len(targets)
            cols = [
                'Target Name', 'DF', 'MS', 'SS', 'p-value',
                'p-value corrected', 'measures', 'distribution', 'test',
                'statistic', 'effect size'
            ]
            stats_dfs = stats_dfs.reindex(columns=cols)

            posthoc_dfs = posthoc_dfs.drop(['Contrast'], axis=1)
            posthoc_dfs = posthoc_dfs.rename(
                columns={
                    'hedges': 'effect size',
                    'p-corr': 'p-value corrected',
                    'p-unc': 'p-value',
                    'p-adjust': 'correction method',
                    'BF10': 'Bayes factor'
                })
            cols2 = [
                'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value',
                'correction method', 'Paired', 'Parametric', 'Test',
                'effect size', 'Bayes factor'
            ]
            posthoc_dfs = posthoc_dfs.reindex(columns=cols2)

    return stats_dfs, posthoc_dfs
예제 #13
0
def test_posthoc(df, dep_var, ind_vars, is_non_normal=None):
    # print(f'\n{dep_var}')
    ind_vars = sorted(ind_vars)

    if is_non_normal == None:
        normality_p = test_normality(df, dep_var, ind_vars)
        significants = [p for p in normality_p if p < 0.01]
        is_non_normal = len(significants) > 0

    iv_combinations = []

    for iv in ind_vars:
        for iv1 in ind_vars:
            if (iv != iv1) and ((iv, iv1) not in iv_combinations) and (
                (iv1, iv) not in iv_combinations):
                iv_combinations.append((iv, iv1))

    for comb in iv_combinations:
        x = df.loc[df['Condition number'] == comb[0]][dep_var]
        y = df.loc[df['Condition number'] == comb[1]][dep_var]

        try:
            if is_non_normal:
                # s, p = wilcoxon(x, y)
                results = pg.wilcoxon(x, y, alternative='two-sided')
                results = results.round(4)

                t = list(results['W-val'])[0]
                p = list(results['p-val'])[0]

                prefix = '   '

                if p < .05:
                    prefix = '*  '
                if p < .01:
                    prefix = '** '
                if p < .001:
                    prefix = '***'

                print(
                    f'{prefix}{comb} Wilco: W={round(t, 2)}, p={round(p, 3)}')
            else:
                paired = True if len(x) == len(y) else False
                results = pg.ttest(x,
                                   y,
                                   paired=paired,
                                   alternative='two-sided')
                results = results.round(4)

                t = list(results['T'])[0]
                p = list(results['p-val'])[0]

                prefix = '   '

                if p < .05:
                    prefix = '*  '
                if p < .01:
                    prefix = '** '
                if p < .001:
                    prefix = '***'

                print(
                    f'{prefix}{comb} Ttest: t={round(t, 2)}, p={round(p, 3)}')

        except Exception as e:
            print(f'Error in {comb}: {e}')

    return
예제 #14
0
    ax2 = pg.qqplot(x2, ax=ax2)
    st.pyplot(fig)

    st.success("Levene test for homoscedasticity of variances")
    homoscedasticity = pg.homoscedasticity(df, dv=x_var, group=y_var)
    st.write(homoscedasticity)

    if param_vs_nonparam == "Parametric tests (Student, Welch)":
        if homoscedasticity.loc["levene", "pval"] < 0.05:
            test_message = "Welch test results:"
        else:
            test_message = "Student t-test results:"

        st.success(test_message)

        t = pg.ttest(x1, x2)
        st.write(t)

    else:
        test_message = "Mann-Whitney test results:"
        st.success(test_message)

        mw = pg.mwu(x1, x2)
        st.write(mw)

    md = markdown.Markdown()
    ipsum_path = Path('Md/student_help.md')

    data = ipsum_path.read_text(encoding='utf-8')
    html = md.convert(data)
    # help_markdown = util.read_markdown_file("help.md")
예제 #15
0
fig, axes = plt.subplots(2, 2, figsize=(9, 4))
metric_types = ['magnitude', 'n_spindles', 'amplitude', 'duration']

p_all = np.zeros((4, 4))
for j_metric_type, metric_type in enumerate(metric_types):

    df_metric_type = stats_df_all.query(
        'metric_type=="{}"'.format(metric_type))
    for j_fb_type, fb_type in enumerate(fb_types):
        ax = axes[j_metric_type // 2, j_metric_type % 2]
        df = df_metric_type.query('fb_type=="{}"'.format(fb_type))

        pd.set_option('display.max_columns', 500)
        res = ttest(df.query('baseline=="After"')['metric'],
                    df.query('baseline=="Before"')['metric'],
                    paired=True)
        # res = pairwise_ttests(df, dv='metric', within='baseline', subject='subj_id')
        p = res['p-val'].values[0]
        p_all[j_fb_type, j_metric_type] = p
        res_str = '$p_u$={:.3f}\n'.format(
            p) + r'$Diff_{CI95}$=' + '[{}, {}]'.format(*res['CI95%'].values[0])

        x_before = df.query('baseline=="Before"')['metric'].values
        x_after = df.query('baseline=="After"')['metric'].values
        for j in range(len(x_before)):
            pair = np.array([x_before[j], x_after[j]])
            ax.plot(np.array([0, 2]) + 3 * j_fb_type,
                    pair,
                    '--o',
                    color='C3' if p < 0.05 else 'k',
예제 #16
0
    'Error': rmaPTER,
    'AbsError': rmaAbsPTER,
    'RT': rmaRT
}

df_PT = pd.DataFrame(data=d2)
df_PT = df_PT[df_PT.AbsError.notnull()]  # no nan
assert (df_PT.PT.isnull().sum() == 0)

df_mean_nn = df_mean[df_mean.AbsError.notnull()]  # dropping null values
####################################
# # Running Statistical Tests
######################################

# t-test for comparison to Sven's analysis
ttestSNR = pingouin.ttest(loSNR, hiSNR, paired=True)  # correction='auto'

# rm_anova for SNR on Error
rm_SNR = pingouin.rm_anova(data=df_mean_nn,
                           dv='AbsError',
                           within=['SNR'],
                           subject='Sub')
print(rm_SNR)

# # MLM for SNR on Error
# mlm_SNR = smf.mixedlm("AbsError ~ SNR", df_mean_nn, groups=df_mean_nn["Sub"])
# mdf_SNR = mlm_SNR.fit()
# print(mdf_SNR.summary())
#
# A = np.identity(len(mdf_SNR.params))
# A = A[1:,:]
fa_IM_B = 1 - Data_IM_B[(Data_IM_B['SameDifferent'] == 'D')].groupby(
    ['Participant'])['isCorrect'].mean()
d_IM_B = SDT(hit_IM_B.tolist(), fa_IM_B.tolist())

## one-sample t test, whether d prime is different from zero
# in all conditions, the d prime was significant larger than zero, except p8!!!
#t1, p1 = stats.ttest_1samp(d_CA_T,0.0)
#t2, p2 = stats.ttest_1samp(d_CM_T,0.0)
#t3, p3 = stats.ttest_1samp(d_IA_T,0.0)
#t4, p4 = stats.ttest_1samp(d_IM_T,0.0)
#t5, p5 = stats.ttest_1samp(d_CA_B,0.0)
#t6, p6 = stats.ttest_1samp(d_CM_B,0.0)
#t7, p7 = stats.ttest_1samp(d_IA_B,0.0)
#t8, p8 = stats.ttest_1samp(d_IM_B,0.0)

e1 = ttest(d_CA_T, 0.0)
e2 = ttest(d_CM_T, 0.0)
e3 = ttest(d_IA_T, 0.0)
e4 = ttest(d_IM_T, 0.0)
[e1['p-val'], e2['p-val'], e3['p-val'], e4['p-val']]
[e1['cohen-d'], e2['cohen-d'], e3['cohen-d'], e4['cohen-d']]

e5 = ttest(d_CA_B, 0.0)
e6 = ttest(d_CM_B, 0.0)
e7 = ttest(d_IA_B, 0.0)
e8 = ttest(d_IM_B, 0.0)
[e5['p-val'], e6['p-val'], e7['p-val'], e8['p-val']]
[e5['cohen-d'], e6['cohen-d'], e7['cohen-d'], e8['cohen-d']]

dprime = pd.DataFrame({
    'Congruent_Aligned_Top': d_CA_T,
예제 #18
0
def gen_histograms(plot_type="histogram"):

    df_pg = pg.ttest(df_stats["valid_wake"],
                     df_stats['valid_sleep'],
                     paired=True)
    df_pg["Variable"] = ["Valid-Invalid"]

    if plot_type == "histogram":
        fig, axes = plt.subplots(2, 3, figsize=(10, 6))
        plt.subplots_adjust(left=.05, top=.95, hspace=.25)

        bins = np.arange(0, 1.05, .1)
        axes[0][0].hist(df_stats["n_valid"],
                        color='green',
                        alpha=.5,
                        edgecolor='black',
                        bins=bins)
        axes[0][0].set_title("% valid (all)")

        axes[1][0].hist(df_stats["n_invalid"],
                        color='red',
                        alpha=.5,
                        edgecolor='black',
                        bins=bins)
        axes[1][0].set_title("% invalid (all)")

        axes[0][1].hist(df_stats["valid_wake"],
                        color='green',
                        alpha=.5,
                        edgecolor='black',
                        bins=bins)
        axes[0][1].set_title("% valid wake")

        axes[0][2].hist(df_stats["invalid_wake"],
                        color='red',
                        alpha=.5,
                        edgecolor='black',
                        bins=bins)
        axes[0][2].set_title("% invalid wake")

        axes[1][1].hist(df_stats["valid_sleep"],
                        color='green',
                        alpha=.5,
                        edgecolor='black',
                        bins=bins)
        axes[1][1].set_title("% valid sleep")

        axes[1][2].hist(df_stats["invalid_sleep"],
                        color='red',
                        alpha=.5,
                        edgecolor='black',
                        bins=bins)
        axes[1][2].set_title("% invalid sleep")

    if plot_type == 'barplot':

        df_desc = df_stats.describe()

        fig, ax = plt.subplots(1, figsize=(10, 6))
        ax.bar(x=df_desc.columns,
               height=df_desc.loc['mean'],
               yerr=df_desc.loc["std"],
               capsize=4,
               color=['green', 'red'],
               edgecolor='black',
               alpha=.5)
        ax.set_title("Mean ± SD")

    if plot_type == 'boxplot':

        fig, ax = plt.subplots(1, figsize=(10, 6))
        df_stats.boxplot(grid=False, ax=ax)

    if plot_type == "scatter":
        plt.scatter(df_stats["valid_wake"],
                    df_stats["valid_sleep"],
                    edgecolors='black',
                    color='red')
        plt.ylabel("valid_sleep")
        plt.xlabel("valid_wake")
        plt.plot(np.arange(0, 1.1, .1),
                 np.arange(0, 1.1, .1),
                 color='black',
                 linestyle='dashed')

    return df_pg
예제 #19
0
 def quantUnpaired(imgDir, sheetName, sheetDf, showDf=False, silent=True):
     print("######################################## ", sheetName,
           " ########################################"
           ) if not silent else None
     print(sheetDf.describe()) if not silent else None
     statDf = pd.DataFrame(columns=[
         'COMPARISON', 'TEST', 'STATISTICS', 'P-VALUE', 'EFFECT SIZE'
     ])
     if len(sheetDf.columns) > 2:
         print(sheetDf) if showDf else None
         aov = pg.rm_anova(sheetDf)
         statistic = aov['F'].values[0]
         pvalue = aov['p-GG-corr'].values[
             0] if 'p-GG-corr' in aov.columns.values else aov[
                 'p-unc'].values[0]
         effsize = aov['np2'].values[0]
         print(sheetDf.columns.str.cat(sep=' | '), " -> ANOVA (statistic:",
               statistic, " p-value: ", pvalue, ")") if not silent else None
         statDf = statDf.append(
             {
                 'COMPARISON': 'ALL',
                 'TEST': "ANOVA",
                 'STATISTICS': statistic,
                 'P-VALUE': pvalue,
                 'EFFECT SIZE': effsize
             },
             ignore_index=True)
     for i in range(len(sheetDf.columns.values)):
         for j in range(i + 1, len(sheetDf.columns.values)):
             try:
                 df = sheetDf[[
                     sheetDf.columns.values[i], sheetDf.columns.values[j]
                 ]]
                 print(df) if showDf else None
                 statistic, pvalue = stats.ttest_ind(*[
                     df.loc[~np.isnan(df[factor]), factor]
                     for factor in df.columns.values
                 ])
                 ttest_stats = pg.ttest(df[df.columns[0]],
                                        df[df.columns[1]],
                                        paired=False)
                 statistic = ttest_stats['T'].values[0]
                 pvalue = ttest_stats['p-val'].values[0]
                 effsize = ttest_stats['cohen-d'].values[0]
                 print(sheetDf.columns.values[i], '|',
                       sheetDf.columns.values[j],
                       " -> Student (statistic: ", statistic, ", p-value: ",
                       pvalue, ")") if not silent else None
                 statDf = statDf.append(
                     {
                         'COMPARISON':
                         sheetDf.columns.values[i] + '|' +
                         sheetDf.columns.values[j],
                         'TEST':
                         "Student",
                         'STATISTICS':
                         statistic,
                         'P-VALUE':
                         pvalue,
                         'EFFECT SIZE':
                         effsize
                     },
                     ignore_index=True)
             except ValueError as StudentError:
                 print(sheetDf.columns.values[i], '|',
                       sheetDf.columns.values[j], " -> Student (",
                       StudentError, ")") if not silent else None
                 statDf = statDf.append(
                     {
                         'COMPARISON':
                         sheetDf.columns.values[i] + '|' +
                         sheetDf.columns.values[j],
                         'TEST':
                         "Student",
                         'STATISTICS':
                         -1,
                         'P-VALUE':
                         -1,
                         'EFFECT SIZE':
                         -1
                     },
                     ignore_index=True)
     BoxPlotter.BoxPlotter(filename=imgDir + '/' + sheetName + '.png',
                           title=sheetName,
                           sheetDf=sheetDf,
                           statDf=statDf)
예제 #20
0
    # statDF['sem'] = df.groupby(['participantsType', 'decisionSteps'])["avoidCommitPercent"].apply(calculateSE)

    statDF = statDF[statDF['participantsType'] == 'Human']
    # statDF = statDF[statDF['participantsType'] == 'RL Agent']

    # statDF = statDF[statDF['decisionSteps'] == 1]
    #
    # print(statDF)
    # dfExpTrail.to_csv('dfExpTrail.csv')

    # Compute the two-way mixed-design ANOVA
    calAnova = 1
    if calAnova:
        import pingouin as pg
        pd.set_option('max_columns', 8)
        stats = pg.ttest(statDF['ShowCommitmentPercent'], 0.5)
        print(stats)
        print('mean:', np.mean(statDF['ShowCommitmentPercent']))
        # print(stats['p-val'])
        # print(stats['CI95%'])

        from scipy import stats
        pop_mean = 0.5
        t, p_twotail = stats.ttest_1samp(statDF['ShowCommitmentPercent'],
                                         pop_mean)
        print('t=', t, 'p=', p_twotail)

        # from scipy import stats
        # a = stats.ttest_1samp(statDF['ShowCommitmentPercent'], 0.5)
        # print(a)
                                  y='model_uncertainty',
                                  subject='image')

            print(dataset + "/" + model, float(corr_res['r']),
                  float(corr_res['pval']))

    print('\nged, p-values, each pair ged(model1) < ged(model2)')
    for dataset in data_images['dataset'].unique():
        data_set = data_images[data_images['dataset'] == dataset]
        for modeli in data_set['model'].unique():
            model_datai = data_set[data_set['model'] == modeli]
            for modelj in data_set['model'].unique():
                if modeli == modelj: continue
                model_dataj = data_set[data_set['model'] == modelj]
                ged_res = pg.ttest(model_datai['ged'],
                                   model_dataj['ged'],
                                   tail='less')
                print(dataset + "/" + modeli + "/" + modelj,
                      float(ged_res['p-val']))

    print('\ncorrelation uncertainty, agreement')
    for dataset in data_samples['dataset'].unique():
        data_set = data_samples[data_samples['dataset'] == dataset]
        for model in data_set['model'].unique():
            model_data = data_set[data_set['model'] == model]
            corr_res = pg.rm_corr(model_data,
                                  x='annot_agreement',
                                  y='model_uncertainty',
                                  subject='image')

            print(dataset + "/" + model, float(corr_res['r']),
print("AL Runs Scored Variance: {}".format(al_rs.var()))

# box plot
fig, ax = plt.subplots(figsize=(8, 8))
sns.boxplot(x="League",
            y="RS",
            data=batting_df,
            palette="Set1",
            boxprops=dict(alpha=0.5))
ax.set(title="Runs Scored Distribution by League")
plt.show()

# Pooled two-sample t-test
test_result = pg.ttest(al_rs,
                       nl_rs,
                       paired=False,
                       alternative='greater',
                       correction=False).round(3)
print("------- Pooled two-sample t-test result -------")
print(test_result.to_string())
# given the p-value is approximately 0,
# we reject H0 and have a strong evidence that AL teams scored more than NL teams on average

# 'RS' histogram and QQ plot
fig, axes = plt.subplots(1, 2, figsize=(20, 8))

sns.histplot(batting_df['RS'], kde=True, ax=axes[0], color="navy")
axes[0].set_title('Team RS Histogram')
axes[1] = stats.probplot(batting_df['RS'], plot=plt)
plt.title('Team RS QQ Plot')
plt.show()
예제 #23
0
import matplotlib.patheffects as mpatheffects

from matplotlib import rcParams
rcParams['savefig.dpi'] = 300
rcParams['interactive'] = True
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = 'Arial'
rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False

EXPORT_FNAME = '../results/sri-phenoll2_plot-GRANT.png'

# from excel sheet
lo = [13, 16, 14, 11, 31, 23, 25, 13, 12, 10, 11, 19]
hi = [30, 40, 35, 15, 35, 31, 36, 11, 10, 23, 17, 19]
ttest = pg.ttest(lo, hi, paired=True)

yvals, yerr = zip(*[(np.mean(vals), sem(vals)) for vals in [lo, hi]])

xvals = [0, 1]
COLORS = dict(lo='gainsboro', hi='cornflowerblue')

color_seq = [COLORS[c] for c in ['lo', 'hi']]

# legend
STROKE_WIDTH = .6
FONT_SIZE = 15
LEFT_PAD = .02
# FONT_PAD = .055

BARWIDTH = .7
예제 #24
0
vara_alle
vara_musik
vara_sound

stda_alle
stda_music
stda_sound

#plot einer Gruppe
get_ipython().magic(u'matplotlib inline')
plt.plot(mean_w)
plt.xlabel('Zeitpunkte')
plt.ylabel('Cortisol nmol/L')
plt.title("Mittelwerte der Cortisolmessungen in der Sound Gruppe")

#plot zwei Gruppen gegeneinander
get_ipython().magic(u'matplotlib inline')
fig, ax = plt.subplots()
ax.plot(mean_m, label='musik')
ax.plot(mean_w, label='sound')
plt.xlabel('Zeitpunkte')
plt.ylabel('Cortisol nmol/L')
plt.title("Mittelwerte Cortisol beide Gruppen")
plt.legend()

''' Normalvertteilt?-Nein wenn p unter alpha'''
stats.shapiro(mean_w)
ttest(mean_w, mean_m, paired =False)
'''Man Whitney U Test , angenommen nicht parametrisch'''
pg.mwu(mean_w, mean_m)
예제 #25
0
         color="blue",
         label="AM",
         linestyle="none")
plt.xlabel("sujet")
plt.ylabel("différence de cadence de modulation (%)")
plt.xticks(subject)
plt.legend(loc=0)
plt.savefig(os.path.join(path_fig, "seuils_discrimination.png"))
plt.show()

#%% anova des seuils adaptatifs
import pingouin as pg

data_adapt = pd.read_csv("seuils_adaptatifs.txt", index_col=0)
data_discr = pd.read_csv("seuils_discrimination.txt", index_col=0)

adapt_am = data_adapt[data_adapt.modulation_type == "AM"]
adapt_fm = data_adapt[data_adapt.modulation_type == "FM"]

aov_adapt_am = pg.anova(data=adapt_am, dv="seuil", between="subject")
aov_adapt_fm = pg.anova(data=adapt_fm, dv="seuil", between="subject")

pg.print_table(aov_adapt_am)
pg.print_table(aov_adapt_fm)

#%% t-test des seuils de discrimination

discr_t_test = pg.ttest(x=am_discr, y=fm_discr, paired=True, tail="one-sided")
pg.print_table(discr_t_test)

#discr_t_test.to_excel("t_test_seuils_discrimination.xlsx")
예제 #26
0
fig2 = alt.Chart(data_line).mark_line().encode(
    x='x', y=alt.Y('y', scale=alt.Scale(domain=(-30, 30))))

#%% defining reference line on y=0
vline = pd.DataFrame([{"x": 0}])
fig3 = (alt.Chart(vline).mark_rule(color="black",
                                   opacity=1.0,
                                   strokeDash=[3, 5]).encode(x="x:Q"))

fig4 = alt.Chart(pd.DataFrame({'y':
                               [0]})).mark_rule(color="black",
                                                opacity=1.0,
                                                strokeDash=[3,
                                                            5]).encode(y='y')

#%% drawing graphs
fig5 = fig1 + fig2 + fig3 + fig4
with col4:
    st.write("####")
    st.altair_chart(fig5, use_container_width=True)

#%% t test
with col4:
    eq1 = r"y = b_{0} + b_{1}x"
    st.latex(eq1)
    eq2 = r"y = b_{0} + b_{1}x"
    eq2 = eq2.replace("b_{0}", f"{mean}").replace("b_{1}x", "0")
    st.latex(eq2)
    st.write("T-test results")
    ttest = pg.ttest(points, 0).round(2).drop(['tail', 'CI95%'], axis=1)
    st.write(ttest)
예제 #27
0
df.groupby('grouping').describe()
display(df.describe())
print('---')

# normal
stats.shapiro(male)
stats.shapiro(female)
display(stats.shapiro(male))
display(stats.shapiro(female))
print('Om p < 0.05 --> icke-normalfördelat material.')
print('---')

# boxplot
sns.boxplot(x='grouping', y='percent', data=df)
plt.savefig('boxplot.png')
# plt.show()

# homogeneity of variance
stats.levene(male, female)
display(stats.levene(male, female))
# nollhypotesen antar att det är homogent - vilket det inte blir med det p-värdet
print('Om p < 0.05 --> homogen varians.')
print('---')

# two-samples t-test
res = pg.ttest(male, female, correction=False)
display(res)

# obs! hårdkodat
# print("Nollhypotesen för Shapiro-Wilks säger normalfördelad grupp. Gör Mann-Whitney!")