Example #1
0
def rm_one_way_anova(dataset: Dataset, design, combined_data: CombinedData):
    data = dataset.data
    xs = combined_data.get_explanatory_variables()
    ys = combined_data.get_explained_variables()

    assert (len(ys) == 1)
    y = ys[0]
    between_subjs = []
    within_subjs = []
    for x in xs:
        if "between subjects" in design and design[
                "between subjects"] == x.metadata[name]:
            between_subjs.append(x.metadata[name])
        if "within subjects" in design and design[
                "within subjects"] == x.metadata[name]:
            within_subjs.append(x.metadata[name])

    # import pdb; pdb.set_trace()
    id = dataset.pid_col_name
    aovrm2way = AnovaRM(data,
                        depvar=y.metadata[name],
                        subject=id,
                        within=within_subjs)
    # aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=dataset.pid_col_name, within=within_subjs, between=between_subjs) # apparently not implemented in statsmodels
    # import pdb; pdb.set_trace()
    res2way = aovrm2way.fit()
def analyzeData(results2):

    print('Accuracy')
    print(
        AnovaRM(data=results2,
                depvar='Accuracy',
                subject='Subject',
                within=['Condition'],
                aggregate_func='mean').fit())

    MultiComp = MultiComparison(results2['Accuracy'], results2['Condition'])
    comp = MultiComp.allpairtest(sci.ttest_rel, method='bonf')
    print(comp[0])

    print('Reaction Time')
    print(
        AnovaRM(data=results2,
                depvar='Reaction Time',
                subject='Subject',
                within=['Condition'],
                aggregate_func='mean').fit())

    MultiComp = MultiComparison(results2['Reaction Time'],
                                results2['Condition'])
    comp = MultiComp.allpairtest(sci.ttest_rel, method='bonf')
    print(comp[0])
Example #3
0
def three_sample_test(sample1, sample2, sample3, test):
    if test == "anova": #parametric, between-subjects.
        test_stat, p_val = scipy.stats.f_oneway(sample1, sample2, sample3)
    elif test =="rm-anova": #parametric, within-subjects.
        data = {"response": [], "id": [], "group": []}
        for i in range(len(sample1)):
            data["response"].append(sample1[i])
            data["id"].append(i)
            data["group"].append("A")

            data["response"].append(sample2[i])
            data["id"].append(i)
            data["group"].append("B")

            data["response"].append(sample3[i])
            data["id"].append(i)
            data["group"].append("C")

        df = pd.DataFrame(data=data)
        anova_rm = AnovaRM(df,depvar="response",subject="id",within=["group"])
        res = anova_rm.fit()
        test_stat = res.anova_table['F Value'][0]
        p_val = res.anova_table['Pr > F'][0]
    elif test == "kruskal-wallis": #nonparametric, between-subjects.
        test_stat, p_val = scipy.stats.kruskal(sample1, sample2, sample3)
    elif test == "friedman": #nonparametric, within-subjects.
        test_stat, p_val = scipy.stats.friedmanchisquare(sample1, sample2, sample3)
    return test_stat,p_val
Example #4
0
def continuous_paired_group_repeated_measures_anova(**kwargs):
    data_frame = kwargs["data_frame"]
    dependable_variable = kwargs["dependable_variable"]
    conditions = kwargs["conditions"]

    # make one condition out of multiple, otherwise not supported by AnovaRM
    sLength = len(data_frame[dependable_variable])
    data_frame.loc[:, 'condition'] = pd.Series(np.empty(sLength),
                                               index=data_frame.index)
    if isinstance(conditions, list) and len(conditions) > 1:
        for name, group in data_frame.groupby(conditions):
            data_frame.loc[
                data_frame.groupby(conditions).get_group(name).index,
                "condition"] = "_".join(name)

    data_frame.drop(columns=conditions)
    # todo: list in conditions not supported map to signle condition required, reduce subject size other wise
    #aovrm = AnovaRM(data_frame, depvar=dependable_variable, subject='test_index', within=conditions)
    aovrm = AnovaRM(data_frame[data_frame["test_index"] < 1000],
                    dependable_variable,
                    'test_index',
                    within=["condition"],
                    aggregate_func=np.mean)
    res = aovrm.fit()

    print(res)
    # todo: how to read pvalue res.summary()...
    return True, 100
Example #5
0
    def rm(self, data, dep_var, subject, within, aggregate_func=None):
        """
        Repeated Measures ANOVA

        Parameters:
        ----------
        data: DataFrame
            Contains at least 3 columns that are 'dependent variable', 'subject', and 'factor' respectively.
        dep_var: str
            Name of the 'dependent variable' column.
        subject: str
            Name of the 'subject' column. (subject identifier)
        within: a list of strings
            Names of the at least one 'factor' columns.

        Return:
        ------
        aov_table: DataFrame
            ANOVA table
        """
        aov_rm = AnovaRM(data,
                         dep_var,
                         subject,
                         within,
                         aggregate_func=aggregate_func)
        aov_table = aov_rm.fit().anova_table

        return aov_table
Example #6
0
def anova(diff1, diff2, recall, within_factors):
    
    r = 'recall'
    
    if not recall:
        r = 'recognition'
    
    diff1 = rearange(diff1, 'short', within_factors = within_factors,
                     recall = recall)
    diff2 = rearange(diff2, 'short', within_factors = within_factors, 
                     recall = recall)
    
    diffs_for_anova = pd.concat([diff1,diff2])
    
    #perform anova
    anovarm = AnovaRM(diffs_for_anova, 'performance', 'sub_id', 
                      within = within_factors, aggregate_func = 'mean')
    res = anovarm.fit()
    
    #rounded p value
    p = round(res.anova_table['Pr > F'][0],4)
    
    print(F'ANOVA ON DIFFERENCES in memory performance - {r}', res)
    
    return  diff1, diff2, diffs_for_anova
Example #7
0
def rm_one_way(xs, y, key, df):
    
    aovrm2way = AnovaRM(df, depvar=y, subject=key,
                        within=xs, aggregate_func='mean')

    res2way = aovrm2way.fit()

    return str(res2way)
Example #8
0
def rm_one_way(xs, y, key, df):
    between_subjs = []
    within_subjs = []

    aovrm2way = AnovaRM(df, depvar=y, subject=key, within=xs, aggregate_func='mean')
    
    res2way = aovrm2way.fit()
    # import pdb; pdb.set_trace()
    return str(res2way)
Example #9
0
def test_repeated_measures_aggregation():
    df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
    df2 = AnovaRM(data.append(data),
                  'DV',
                  'id',
                  within=['A', 'B', 'D'],
                  aggregate_func=np.mean).fit()

    assert_frame_equal(df1.anova_table, df2.anova_table)
Example #10
0
def test_repeated_measures_aggregation_one_subject_duplicated():
    df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
    df2 = AnovaRM(data.append(data.loc[data['id'] == '1', :]).reset_index(),
                  'DV',
                  'id',
                  within=['A', 'B', 'D'],
                  aggregate_func=np.mean).fit()

    assert_frame_equal(df1.anova_table, df2.anova_table)
Example #11
0
def test_repeated_measures_aggregation():
    df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
    double_data = pd.concat([data, data], axis=0)
    df2 = AnovaRM(double_data,
                  'DV',
                  'id',
                  within=['A', 'B', 'D'],
                  aggregate_func=np.mean).fit()

    assert_frame_equal(df1.anova_table, df2.anova_table)
Example #12
0
def AnovaRM_with_post_hoc(data, dep_var, subject, within, only_significant = False):
    # One within
    anova = AnovaRM(data, dep_var, subject, within)
    print(anova.fit())
    # Post-hoc with ttest
    pairwise_ttest_rel(data,
                       dep_var,
                       within = within,
                       only_significant = only_significant
                      )        
Example #13
0
def rm_one_way_anova(dataset: Dataset, predictions, design,
                     combined_data: CombinedData):
    data = dataset.data
    xs = combined_data.get_explanatory_variables()
    ys = combined_data.get_explained_variables()

    assert (len(ys) == 1)
    y = ys[0]
    between_subjs = []
    within_subjs = []
    for x in xs:
        if "between subjects" in design and design[
                "between subjects"] == x.metadata[name]:
            between_subjs.append(x.metadata[name])
        if "within subjects" in design and design[
                "within subjects"] == x.metadata[name]:
            within_subjs.append(x.metadata[name])

    if predictions:
        if isinstance(predictions[0], list):
            prediction = predictions[0][0]
        else:
            prediction = predictions[0]
    else:
        prediction = None

    key = dataset.pid_col_name
    aovrm2way = AnovaRM(data,
                        depvar=y.metadata[name],
                        subject=key,
                        within=within_subjs,
                        aggregate_func='mean')
    # aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=dataset.pid_col_name, within=within_subjs, between=between_subjs) # apparently not implemented in statsmodels
    res2way = aovrm2way.fit()
    result_df = res2way.anova_table

    col_name = x.metadata[name]
    for row_name in result_df.index:
        if row_name == col_name:
            row_data = result_df.loc[row_name]
            test_statistic = row_data['F Value']
            p_val = row_data['Pr > F']
            dof = (row_data['Num DF'], row_data['Den DF'])

    test_result = TestResult(name=rm_one_way_anova_name,
                             test_statistic=test_statistic,
                             p_value=p_val,
                             prediction=prediction,
                             dof=dof,
                             alpha=combined_data.alpha,
                             table=result_df,
                             x=x,
                             y=y)

    return test_result
Example #14
0
def rm_anova(data=None, subject=None, within=None, between=None, dv=None):
    """
    Returns ANOVA table as dataframe.
    """
    anova = AnovaRM(data=data,
                    subject=subject,
                    within=within,
                    between=between,
                    depvar=dv)
    fit = anova.fit()
    return fit.anova_table
Example #15
0
def getRMAnova(dataSet, labels, verbose=False):
    tlabels = np.concatenate([[labels[j] for _,y in enumerate(x) ]for j,x in enumerate(dataSet)])
    concatData = np.concatenate(dataSet)
    ids = np.concatenate([np.arange(len(x)) for _,x in enumerate(dataSet)])
    d = {'id':ids, 'rt':concatData, 'cond':tlabels}
    df = pd.DataFrame(d)
    anovarm = AnovaRM(df, 'rt', 'id', within=['cond'])
    res = anovarm.fit()
    if verbose:
        print (res.summary())
    return res
Example #16
0
def test_repeated_measures_aggregate_func():
    assert_raises(ValueError, AnovaRM, data.append(data), 'DV', 'id',
                  within=['A', 'B', 'D'])

    m1 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func=np.mean)
    m2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'],
                 aggregate_func=np.median)

    assert_raises(AssertionError, assert_equal,
                  m1.aggregate_func, m2.aggregate_func)
    assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)
Example #17
0
def test_repeated_measures_aggregate_func_mean():
    m1 = AnovaRM(data.append(data),
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func=np.mean)

    m2 = AnovaRM(data.append(data),
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func='mean')

    assert_equal(m1.aggregate_func, m2.aggregate_func)
Example #18
0
def rank_multiple_normal_homoscedastic(data, alpha, verbose, order,
                                       effect_size, force_mode):
    """
    Analyzes data using repeated measures ANOVA and Tukey HSD.
    """
    stacked_data = data.stack().reset_index()
    stacked_data = stacked_data.rename(columns={
        'level_0': 'id',
        'level_1': 'treatment',
        0: 'result'
    })
    anova = AnovaRM(stacked_data, 'result', 'id', within=['treatment'])
    pval = anova.fit().anova_table['Pr > F'].iat[0]
    if verbose:
        if pval >= alpha:
            print(
                "Fail to reject null hypothesis that there is no difference between the distributions (p=%f)"
                % pval)
        else:
            print(
                "Rejecting null hypothesis that there is no difference between the distributions (p=%f)"
                % pval)
            print(
                "Using Tukey HSD post hoc test.",
                "Differences are significant if the confidence intervals of the mean values are not overlapping."
            )

    multicomp = MultiComparison(stacked_data['result'],
                                stacked_data['treatment'])
    tukey_res = multicomp.tukeyhsd()
    # must create plot to get confidence intervals
    tukey_res.plot_simultaneous()
    # delete plot instead of showing
    plt.close()

    rankdf, effsize_method, reorder_pos = _create_result_df_skeleton(
        data,
        None,
        True,
        order,
        effect_size=effect_size,
        force_mode=force_mode)
    for population in rankdf.index:
        mean = data.loc[:, population].mean()
        ci_range = tukey_res.halfwidths[data.columns.get_loc(population)]
        lower, upper = mean - ci_range, mean + ci_range
        rankdf.at[population, 'ci_lower'] = lower
        rankdf.at[population, 'ci_upper'] = upper
    return _ComparisonResult(rankdf, pval, None, 'anova', 'tukeyhsd',
                             effsize_method, reorder_pos)
Example #19
0
def test_repeated_measures_aggregate_func_mean():
    double_data = pd.concat([data, data], axis=0)
    m1 = AnovaRM(double_data,
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func=np.mean)

    m2 = AnovaRM(double_data,
                 'DV',
                 'id',
                 within=['A', 'B', 'D'],
                 aggregate_func='mean')

    assert_equal(m1.aggregate_func, m2.aggregate_func)
    def anovaRM(self, depvar, subject, within=None, between=None, aggregate_func=None):
        """
             Repeated measures Anova using least squares regression
             The full model regression residual sum of squares is used to compare
             with the reduced model for calculating the within-subject effect sum of squares.
             Currently, only fully balanced within-subject designs are supported. Calculation of
             between-subject effects and corrections for violation of sphericity are not yet implemented.

             Parameters
             ----------
             depvar: str
                 The dependent variable in data
             subject: str
                 Specify the subject id
             within: list[str]
                 The within-subject factors
            between: list[str]
                 The between-subject factors, this is not yet implemented
            aggregate_func: {None, ‘mean’, callable}
                 If the data set contains more than a single observation per
                 subject and cell of the specified model, this function will be
                 used to aggregate the data before running the Anova. None (the default)
                 will not perform any aggregation; ‘mean’ is s shortcut to numpy.mean.
                 An exception will be raised if aggregation is required,
                 but no aggregation function was specified.

            Returns
            ----------
            AnovaResults instance

            Notes
            ----------
            This implementation currently only supports fully balanced designs.
            If the data contain more than one observation per subject and cell of
            the design, these observations need to be aggregated into a single
            observation before the Anova is calculated, either manually or by passing
            an aggregation function via the aggregate_func keyword argument.
            Note that if the input data set was not balanced before performing
            the aggregation, the implied heteroscedasticity of the data is ignored.

            References
            ----------
            Rutherford, Andrew. Anova and ANCOVA: a GLM approach. John Wiley & Sons, 2011.

        """
        res = AnovaRM(self.__data, depvar, subject, within, between, aggregate_func)
        res = res.fit()
        print(res)
    def run_anova(self):

        self.aov = AnovaRM(self.df_long, depvar="Minutes", subject="ID", within=["Group"])
        self.aov_results = self.aov.fit()

        print("\n" + "======================================== MAIN EFFECTS ========================================")
        print("\n", self.aov_results.anova_table)

        self.tukey = "n.s."

        if self.aov_results.anova_table["Pr > F"][0] <= 0.05:
            print("")
            tukey_data = MultiComparison(self.df_long["Minutes"], self.df_long["Group"])
            self.tukey = tukey_data.tukeyhsd(alpha=0.05)
            print("============================================ POST HOC ===========================================")
            print("\n", self.tukey.summary())
Example #22
0
def test_repeated_measures_aggregate_compare_with_ezANOVA():
    # Results should reproduces those from R's `ezANOVA` (library ez).
    ez = pd.DataFrame(
        {
            'F Value': [
                8.7650709, 8.4985785, 20.5076546, 0.8457797, 21.7593382,
                6.2416695, 5.4253359
            ],
            'Num DF': [1, 2, 1, 2, 1, 2, 2],
            'Den DF': [7, 14, 7, 14, 7, 14, 14],
            'Pr > F': [
                0.021087505, 0.003833921, 0.002704428, 0.450021759,
                0.002301792, 0.011536846, 0.018010647
            ]
        },
        index=pd.Index(['A', 'B', 'D', 'A:B', 'A:D', 'B:D', 'A:B:D']))
    ez = ez[['F Value', 'Num DF', 'Den DF', 'Pr > F']]

    double_data = pd.concat([data, data], axis=0)
    df = (AnovaRM(double_data,
                  'DV',
                  'id',
                  within=['A', 'B', 'D'],
                  aggregate_func=np.mean).fit().anova_table)

    assert_frame_equal(ez, df, check_dtype=False)
Example #23
0
    def fit(self,
            data,
            depvar,
            subject,
            within=None,
            between=None,
            aggregate_func=None):
        """Estimate the model and compute ANOVA table.
        
        Parameters
        ----------
        data : DataFrame
        depvar : str
            The dependent variable in `data`
        subject : str
            Specify the subject id
        within : list[str]
            The within-subject factors
        between : list[str]
            The between-subject factors, this is not yet implemented
        aggregate_func : {None, 'mean', callable}
            If the data set contains more than a single observation per subject
            and cell of the specified model, this function will be used to
            aggregate the data before running the Anova. `None` (the default) will
            not perform any aggregation; 'mean' is s shortcut to `numpy.mean`.
            An exception will be raised if aggregation is required, but no
            aggregation function was specified.

        Returns
        -------
        results : AnovaResults instance

        Raises
        ------
        ValueError
            If the data need to be aggregated, but `aggregate_func` was not
            specified.

        """
        anova = AnovaRM(data=data,
                        depvar=depvar,
                        subject=subject,
                        within=within,
                        between=between,
                        aggregate_func=aggregate_func)
        self._results = anova.fit()
Example #24
0
def calculate_anova(df):
    pvals = []
    num_subjs = 9
    for vox in tqdm(df):
        vox = calculate_avg_across_models(vox)
        vox = np.append(
            vox,
            np.reshape(np.array(list(range(1, num_subjs + 1))),
                       (num_subjs, 1)), 1)
        vox = pd.DataFrame(vox,
                           columns=['bert', 'baseline', 'opennmt', 'subject'])
        sub_vox = vox.melt(id_vars=["subject"],
                           var_name="model",
                           value_name="corr")
        aovrm2way = AnovaRM(sub_vox, "corr", "model", within=["subject"])
        mod = aovrm2way.fit()
        pval = mod.summary().tables[0]["Pr > F"]["subject"]
        pvals.append(pval)
    return pvals
Example #25
0
def test_single_factor_repeated_measures_anova():
    """
    Testing single factor repeated measures anova
    Results reproduces R `ezANOVA` function from library ez
    """
    df = AnovaRM(data.iloc[:16, :], 'DV', 'id', within=['B']).fit()
    a = [[1, 7, 22.4, 0.002125452]]
    assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
                              a,
                              decimal=5)
Example #26
0
def anova(data):

    data = pd.melt(data,
                   id_vars='sub_id',
                   var_name='cond',
                   value_name='performance')

    #    #perform anova
    anovarm = AnovaRM(data, 'performance', 'sub_id', within=['cond'])
    res = anovarm.fit()

    #rounded p value
    p = round(res.anova_table['Pr > F'][0], 2)
    F = round(res.anova_table['F Value'][0], 2)
    #    print(F'ANOVA ON DIFFERENCES in memory performance - {r}', res)
    print(F, p)

    res2 = [[F, p]]

    return res2
Example #27
0
def test_two_factors_repeated_measures_anova():
    """
    Testing two factors repeated measures anova
    Results reproduces R `ezANOVA` function from library ez
    """
    df = AnovaRM(data.iloc[:48, :], 'DV', 'id', within=['A', 'B']).fit()
    a = [[1, 7, 40.14159, 3.905263e-04], [2, 14, 29.21739, 1.007549e-05],
         [2, 14, 17.10545, 1.741322e-04]]
    assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
                              a,
                              decimal=5)
Example #28
0
def test_three_factors_repeated_measures_anova():
    """
    Testing three factors repeated measures anova
    Results reproduces R `ezANOVA` function from library ez
    """
    df = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
    a = [[1, 7, 8.7650709, 0.021087505], [2, 14, 8.4985785, 0.003833921],
         [1, 7, 20.5076546, 0.002704428], [2, 14, 0.8457797, 0.450021759],
         [1, 7, 21.7593382, 0.002301792], [2, 14, 6.2416695, 0.011536846],
         [2, 14, 5.4253359, 0.018010647]]
    assert_array_almost_equal(df.anova_table.iloc[:, [1, 2, 0, 3]].values,
                              a,
                              decimal=5)
Example #29
0
def calculate_anova(args, all_corrs):
    dims = all_corrs[0][0].shape
    pvals = np.zeros((dims[0], dims[1], dims[2]))
    num_layers = 12
    num_subjs = 9
    print("LEN: " + str(len(all_corrs)))
    print("DIMS: " + str(all_corrs[0][0].shape))

    for i in tqdm(range(dims[0])):
        for j in range(dims[1]):
            for k in range(dims[2]):

                vals_across_subjs_and_layers = []
                for subj in range(num_subjs):
                    for layer in range(num_layers):
                        val = all_corrs[subj][layer][i][j][k]
                        vals_across_subjs_and_layers.append(
                            all_corrs[subj][layer][i][j][k])

                # make dataframe
                df = pd.DataFrame({
                    'voxel':
                    np.ones(len(vals_across_subjs_and_layers)),
                    'corr':
                    vals_across_subjs_and_layers,
                    'subject':
                    np.repeat(list(range(1, num_subjs + 1)), num_layers),
                    'layer':
                    np.tile(list(range(1, num_layers + 1)), num_subjs)
                })

                aovrm2way = AnovaRM(df,
                                    'voxel',
                                    'corr',
                                    within=['subject', 'layer'])
                mod = aovrm2way.fit()
                pval = mod.summary().tables[0]["Pr > F"]["subject:layer"]
                pvals[i][j][k] = pval
    return pvals
Example #30
0
def anova_group(means, recog):
 
    t = 'recall'
    
    if recog:
        t = 'recog'
        
        
    #melt df
    means = pd.melt(means,id_vars = 'sub_id',
                           var_name = 'cond', 
                           value_name = 'performance')
    
    
    
    anovarm = AnovaRM(means, 'performance', 'sub_id', within = ['cond'])
    res = anovarm.fit()
    
    p = round(res.anova_table['Pr > F'][0],4)
    
    print(F'reaction times anova ({t})', res)
    
    return p
Example #31
0
def rlrlRMANOVA(mes):
    # RL-RL ANOVA RM
    aexps = expandEvals(mes)

    print('********** RL Controller Error RMANOVA **********')
    aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist()
    avrm = AnovaRM(aexps, 'error', 's_id', within=['model'])
    rma = avrm.fit()
    print(rma)

    print('********** RL Controller Error RMANOVA **********')
    aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist()
    avrm = AnovaRM(aexps, 'rise_time', 's_id', within=['model'])
    rma = avrm.fit()
    print(rma)

    print('********** RL Controller Error RMANOVA **********')
    aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist()
    avrm = AnovaRM(aexps, 'energy', 's_id', within=['model'])
    rma = avrm.fit()
    print(rma)