def test__lmer_get_summaries_df(epoch_id, time): fgrid_lmer = fitgrid.lmer( _get_epochs_fg(epoch_id=epoch_id, time=time), RHS="1 + continuous + (1 | categorical)", parallel=PARALLEL, n_cores=N_CORES, ) summaries_df = fitgrid.utils.summary._lmer_get_summaries_df(fgrid_lmer) fitgrid.utils.summary._check_summary_df(summaries_df, fgrid_lmer)
def get_lmer_dfbetas(epochs, factor, **kwargs): r"""Fit lmers leaving out factor levels one by one, compute DBETAS. Parameters ---------- epochs : Epochs Epochs object factor : str column name of the factor of interest **kwargs keyword arguments to pass on to ``fitgrid.lmer``, like ``RHS`` Returns ------- dfbetas : pandas.DataFrame dataframe containing DFBETAS values Examples -------- Example calculation showing how to pass in model fitting parameters:: dfbetas = fitgrid.utils.lmer.get_lmer_dfbetas( epochs=epochs, factor='subject_id', RHS='x + (x|a) ) Notes ----- DFBETAS is computed according to the following formula [NieGroPel2012]_: .. math:: DFBETAS_{ij} = \frac{\hat{\gamma}_i - \hat{\gamma}_{i(-j)}}{se\left(\hat{\gamma}_{i(-j)}\right)} for parameter :math:`i` and level :math:`j` of ``factor``. """ # get the factor levels table = epochs.table.reset_index().set_index( [epochs.epoch_id, epochs.time] ) levels = table[factor].unique() # produce epochs tables with each level left out looo_epochs = ( fitgrid.epochs_from_dataframe( table[table[factor] != level], time=epochs.time, epoch_id=epochs.epoch_id, channels=epochs.channels, ) for level in levels ) # fit lmer on these epochs fitter = functools.partial(fitgrid.lmer, **kwargs) grids = map(fitter, looo_epochs) coefs = (grid.coefs for grid in grids) # get coefficient estimates and se from leave one out fits looo_coefs = pd.concat(coefs, keys=levels, axis=1) looo_estimates = looo_coefs.loc[pd.IndexSlice[:, :, 'Estimate'], :] looo_se = looo_coefs.loc[pd.IndexSlice[:, :, 'SE'], :] # get coefficient estimates from regular fit (all levels included) all_levels_coefs = fitgrid.lmer(epochs, **kwargs).coefs all_levels_estimates = all_levels_coefs.loc[ pd.IndexSlice[:, :, 'Estimate'], : ] # drop outer level of index for convenience for df in (looo_estimates, looo_se, all_levels_estimates): df.index = df.index.droplevel(level=-1) # (all_levels_estimate - level_excluded_estimate) / level_excluded_se dfbetas = all_levels_estimates.sub(looo_estimates, level=1).div( looo_se, level=1 ) return dfbetas.stack(level=0)
def test_summarize_lmer_kwargs(kw, est, aic): epochs_fg = _get_epochs_fg(seed=0) # freeze data to test values LHS = epochs_fg.channels RHS = "1 + (1 | categorical)" # for fitgrid.lmer # what the fitgrid modeler returns lmer_fit = fitgrid.lmer(epochs_fg, LHS=LHS, RHS=RHS, **kw) lmer_fit_betas = lmer_fit.coefs lmer_fit_betas.index.names = [lmer_fit.time, 'beta', 'key'] # what the summarize wrapper scrapes from the grid summaries_df = fitgrid.utils.summary.summarize( epochs_fg, "lmer", LHS=LHS, RHS=RHS, **kw ) fitgrid.utils.summary._check_summary_df(summaries_df, lmer_fit) # compare results summary_keys = set(summaries_df.index.unique('key')) lmer_fit_betas_keys = set(lmer_fit_betas.index.unique('key')) # from the grid.params ... lmer specific shared_keys = summary_keys.intersection(lmer_fit_betas_keys) # other grid.attr attr_keys = [key for key in summary_keys if key in dir(lmer_fit.tester)] for key in shared_keys.union(attr_keys): # these come from the coefs dataframe if key in shared_keys: modeler_vals = lmer_fit_betas.query("key==@key").reset_index( drop=True ) summarize_vals = summaries_df.query( "model==@RHS and key==@key" ).reset_index(drop=True) # these come from grid attributes elif key in attr_keys: modeler_vals = getattr(lmer_fit, key).reset_index(drop=True) summarize_vals = summaries_df.query("key==@key").reset_index( drop=True ) else: raise ValueError(f"unknown key: {key}") try: all(modeler_vals == summarize_vals) except Exception as fail: msg = f"kwargs: {kw} key: {key}" print(msg) raise fail # smoke test that the REML=True v. False is not changing Estimate # and is changing AIC if key == 'Estimate': assert all(summarize_vals == est) if key == 'AIC': assert all(summarize_vals == aic) pass
def fit_lmers(fg_epochs, LHS, RHSs, parallel=True, n_cores=4, save_as=None): """Fit a set of lmer models and return rERPs, AICs, and lmer warnings Parameters ---------- LHS : fitgrid.lmer LHS specification RHSs : list of fitgrid.lmer RHS specifications parallel : bool Returns ------- lmer_coefs : multi-indexed pandas.DataFrame Time, model, param, key x LHS Raises ------ FutureWarning Examples -------- :: LHS = ['MiPf', 'MiCe', 'MiPa', 'MiOc', 'cproi'] LHS = ['cproi'] RHSs = [ 'a_cloze_c + (a_cloze_c | sub_id) + (a_cloze_c | m_item_id)', 'a_cloze_c + (a_cloze_c | sub_id) + (1 | m_item_id)', 'a_cloze_c + (1 | sub_id) + (a_cloze | m_item_id)', 'a_cloze_c + (1 | sub_id) + (1 | m_item_id)' ] """ # container to hold model information scraped from the fits lmer_coefs = pd.DataFrame() attribs = ['AIC', 'has_warning'] for rhs in RHSs: fg_lmer = fitgrid.lmer( fg_epochs, LHS=LHS, RHS=rhs, parallel=parallel, n_cores=n_cores ) fg_lmer.coefs.index.names = ['Time', 'param', 'key'] # coef estimates and stats ... these are 2-D coefs_df = fg_lmer.coefs.copy() coefs_df.insert(0, 'model', rhs) coefs_df.set_index('model', append=True, inplace=True) coefs_df.reset_index(['key', 'param'], inplace=True) # LOGGER.info('collecting fit attributes into coefs dataframe') # scrape AIC and other useful 1-D fit attributes into coefs_df for attrib in attribs: # LOGGER.info(attrib) attrib_df = getattr(fg_lmer, attrib).copy() attrib_df.insert(0, 'model', rhs) attrib_df.insert(1, 'key', attrib) # propagate attributes to each param ... wasteful but tidy for param in coefs_df['param'].unique(): param_attrib = attrib_df.copy().set_index('model', append=True) param_attrib.insert(0, 'param', param) coefs_df = coefs_df.append(param_attrib) # update main container lmer_coefs = lmer_coefs.append(coefs_df) del (fg_lmer) # refresh index lmer_coefs.set_index(['param', 'key'], append=True, inplace=True) lmer_coefs.sort_index(inplace=True) if save_as is not None: try: fname, group = save_as lmer_coefs.to_hdf(fname, group) except Exception as fail: warnings.warn( f"save_as={save_as} failed: {fail}. You can try to " "save the returned dataframe with pandas.to_hdf()" ) FutureWarning('lmer_coefs are in early days, subject to change') return lmer_coefs