Esempio n. 1
0
def test__lmer_get_summaries_df(epoch_id, time):

    fgrid_lmer = fitgrid.lmer(
        _get_epochs_fg(epoch_id=epoch_id, time=time),
        RHS="1 + continuous + (1 | categorical)",
        parallel=PARALLEL,
        n_cores=N_CORES,
    )

    summaries_df = fitgrid.utils.summary._lmer_get_summaries_df(fgrid_lmer)
    fitgrid.utils.summary._check_summary_df(summaries_df, fgrid_lmer)
Esempio n. 2
0
def get_lmer_dfbetas(epochs, factor, **kwargs):
    r"""Fit lmers leaving out factor levels one by one, compute DBETAS.

    Parameters
    ----------
    epochs : Epochs
        Epochs object
    factor : str
        column name of the factor of interest
    **kwargs
        keyword arguments to pass on to ``fitgrid.lmer``, like ``RHS``

    Returns
    -------
    dfbetas : pandas.DataFrame
        dataframe containing DFBETAS values

    Examples
    --------
    Example calculation showing how to pass in model fitting parameters::

        dfbetas = fitgrid.utils.lmer.get_lmer_dfbetas(
            epochs=epochs,
            factor='subject_id',
            RHS='x + (x|a)
        )

    Notes
    -----
    DFBETAS is computed according to the following formula [NieGroPel2012]_:

    .. math::

       DFBETAS_{ij} = \frac{\hat{\gamma}_i - \hat{\gamma}_{i(-j)}}{se\left(\hat{\gamma}_{i(-j)}\right)}

    for parameter :math:`i` and level :math:`j` of ``factor``.


    """

    # get the factor levels
    table = epochs.table.reset_index().set_index(
        [epochs.epoch_id, epochs.time]
    )
    levels = table[factor].unique()

    # produce epochs tables with each level left out
    looo_epochs = (
        fitgrid.epochs_from_dataframe(
            table[table[factor] != level],
            time=epochs.time,
            epoch_id=epochs.epoch_id,
            channels=epochs.channels,
        )
        for level in levels
    )

    # fit lmer on these epochs
    fitter = functools.partial(fitgrid.lmer, **kwargs)
    grids = map(fitter, looo_epochs)
    coefs = (grid.coefs for grid in grids)

    # get coefficient estimates and se from leave one out fits
    looo_coefs = pd.concat(coefs, keys=levels, axis=1)
    looo_estimates = looo_coefs.loc[pd.IndexSlice[:, :, 'Estimate'], :]
    looo_se = looo_coefs.loc[pd.IndexSlice[:, :, 'SE'], :]

    # get coefficient estimates from regular fit (all levels included)
    all_levels_coefs = fitgrid.lmer(epochs, **kwargs).coefs
    all_levels_estimates = all_levels_coefs.loc[
        pd.IndexSlice[:, :, 'Estimate'], :
    ]

    # drop outer level of index for convenience
    for df in (looo_estimates, looo_se, all_levels_estimates):
        df.index = df.index.droplevel(level=-1)

    # (all_levels_estimate - level_excluded_estimate) / level_excluded_se
    dfbetas = all_levels_estimates.sub(looo_estimates, level=1).div(
        looo_se, level=1
    )

    return dfbetas.stack(level=0)
Esempio n. 3
0
def test_summarize_lmer_kwargs(kw, est, aic):

    epochs_fg = _get_epochs_fg(seed=0)  # freeze data to test values

    LHS = epochs_fg.channels
    RHS = "1 + (1 | categorical)"  # for fitgrid.lmer

    # what the fitgrid modeler returns
    lmer_fit = fitgrid.lmer(epochs_fg, LHS=LHS, RHS=RHS, **kw)
    lmer_fit_betas = lmer_fit.coefs
    lmer_fit_betas.index.names = [lmer_fit.time, 'beta', 'key']

    # what the summarize wrapper scrapes from the grid
    summaries_df = fitgrid.utils.summary.summarize(
        epochs_fg, "lmer", LHS=LHS, RHS=RHS, **kw
    )
    fitgrid.utils.summary._check_summary_df(summaries_df, lmer_fit)

    # compare results
    summary_keys = set(summaries_df.index.unique('key'))
    lmer_fit_betas_keys = set(lmer_fit_betas.index.unique('key'))

    # from the grid.params ... lmer specific
    shared_keys = summary_keys.intersection(lmer_fit_betas_keys)

    # other grid.attr
    attr_keys = [key for key in summary_keys if key in dir(lmer_fit.tester)]
    for key in shared_keys.union(attr_keys):

        # these come from the coefs dataframe
        if key in shared_keys:
            modeler_vals = lmer_fit_betas.query("key==@key").reset_index(
                drop=True
            )

            summarize_vals = summaries_df.query(
                "model==@RHS and key==@key"
            ).reset_index(drop=True)

        # these come from grid attributes
        elif key in attr_keys:
            modeler_vals = getattr(lmer_fit, key).reset_index(drop=True)
            summarize_vals = summaries_df.query("key==@key").reset_index(
                drop=True
            )
        else:
            raise ValueError(f"unknown key: {key}")

        try:
            all(modeler_vals == summarize_vals)
        except Exception as fail:
            msg = f"kwargs: {kw} key: {key}"
            print(msg)
            raise fail

        # smoke test that the REML=True v. False is not changing Estimate
        # and is changing AIC
        if key == 'Estimate':
            assert all(summarize_vals == est)

        if key == 'AIC':
            assert all(summarize_vals == aic)

    pass
Esempio n. 4
0
def fit_lmers(fg_epochs, LHS, RHSs, parallel=True, n_cores=4, save_as=None):
    """Fit a set of lmer models and return rERPs, AICs, and lmer warnings

    Parameters
    ----------
    LHS : fitgrid.lmer LHS specification

    RHSs : list of fitgrid.lmer RHS specifications

    parallel : bool

    Returns
    -------
    lmer_coefs : multi-indexed pandas.DataFrame
       Time, model, param, key x LHS

    Raises
    ------
    FutureWarning

    Examples
    --------
    ::

        LHS = ['MiPf', 'MiCe', 'MiPa', 'MiOc', 'cproi']
        LHS = ['cproi']

        RHSs = [
            'a_cloze_c + (a_cloze_c | sub_id) + (a_cloze_c | m_item_id)',
            'a_cloze_c + (a_cloze_c | sub_id) + (1 | m_item_id)',
            'a_cloze_c + (1 | sub_id) + (a_cloze | m_item_id)',
            'a_cloze_c + (1 | sub_id) + (1 | m_item_id)'
        ]

    """

    # container to hold model information scraped from the fits
    lmer_coefs = pd.DataFrame()
    attribs = ['AIC', 'has_warning']
    for rhs in RHSs:
        fg_lmer = fitgrid.lmer(
            fg_epochs, LHS=LHS, RHS=rhs, parallel=parallel, n_cores=n_cores
        )
        fg_lmer.coefs.index.names = ['Time', 'param', 'key']

        # coef estimates and stats ... these are 2-D
        coefs_df = fg_lmer.coefs.copy()
        coefs_df.insert(0, 'model', rhs)
        coefs_df.set_index('model', append=True, inplace=True)
        coefs_df.reset_index(['key', 'param'], inplace=True)

        # LOGGER.info('collecting fit attributes into coefs dataframe')
        # scrape AIC and other useful 1-D fit attributes into coefs_df
        for attrib in attribs:
            # LOGGER.info(attrib)
            attrib_df = getattr(fg_lmer, attrib).copy()
            attrib_df.insert(0, 'model', rhs)
            attrib_df.insert(1, 'key', attrib)

            # propagate attributes to each param ... wasteful but tidy
            for param in coefs_df['param'].unique():
                param_attrib = attrib_df.copy().set_index('model', append=True)
                param_attrib.insert(0, 'param', param)
                coefs_df = coefs_df.append(param_attrib)

        # update main container
        lmer_coefs = lmer_coefs.append(coefs_df)
        del (fg_lmer)

    # refresh index
    lmer_coefs.set_index(['param', 'key'], append=True, inplace=True)
    lmer_coefs.sort_index(inplace=True)

    if save_as is not None:
        try:
            fname, group = save_as
            lmer_coefs.to_hdf(fname, group)
        except Exception as fail:
            warnings.warn(
                f"save_as={save_as} failed: {fail}. You can try to "
                "save the returned dataframe with pandas.to_hdf()"
            )

    FutureWarning('lmer_coefs are in early days, subject to change')
    return lmer_coefs