def _hopkins_ems(regression_model, v=False): """ Returns a table that can be used v=True prints E(MS) components (False by default) """ assert ismodel(regression_model) # check that no var is in model if any([type(f) == var for f in regression_model.factors]): return [None] * len(regression_model.effects) # E(MS) table (after Hopkins, 1976) E_MS_table = [] for e in regression_model.effects: E_MS_row = [] for e2 in regression_model.effects: if np.all([(f in e or f.random) for f in e2.factors]) and np.all( [(f in e2 or e2.nestedin(f)) for f in e.factors] ): E_MS_row.append(True) else: E_MS_row.append(False) E_MS_table.append(E_MS_row) E_MS = np.array(E_MS_table, dtype=bool) if v: print "E(MS) component table:\n", E_MS # read MS denominator for F tests from table MS_denominators = [] for i, f in enumerate(regression_model.effects): e_ms_den = deepcopy(E_MS[i]) e_ms_den[i] = False match = np.all(E_MS == e_ms_den, axis=1) if v: print f.name, ":", match if match.sum() == 1: match_i = np.where(match)[0][0] MS_denominators.append(match_i) elif match.sum() == 0: MS_denominators.append(None) else: raise NotImplementedError("too many matches") return MS_denominators
def correlations(Y, Xs, cat=None, levels=[.05, .01, .001], diff=None, sub=None, pmax=None, nan=True): # , match=None): """ :arg var Y: first variable :arg var X: second variable (or list of variables) :arg cat: show correlations separately for different groups in the data. Can be a ``factor`` (the correlation for each level is shown separately) or an array of ``bool`` values (e.g. from a comparison like ``Stim==1``) :arg list levels: significance levels to mark :arg diff: (factor, cat_1, cat_2) :arg sub: use only a subset of the data :arg pmax: (None) don't show correlations with p>pmax :arg nan: ``True``: display correlation which yield NAN; ``False``: hide NANs but mention occurrence in summary (not implemented); ``None``: don't mention NANs :rtype: Table """ levels = np.array(levels) if isvar(Xs): Xs = [Xs] # SUB if sub is not None: Y = Y[sub] Xs = [X[sub] for X in Xs] if ismodel(cat) or isfactor(cat): cat = cat[sub] if diff is not None: raise NotImplementedError if cat is None: table = fmtxt.Table('l' * 4) table.cells('Variable', 'r', 'p', 'n') else: assert iscategorial(cat) table = fmtxt.Table('l' * 5) table.cells('Variable', 'Category', 'r', 'p', 'n') table.midrule() table.title("Correlations with %s" % (Y.name)) table._my_nan_count = 0 for X in Xs: if cat is None: _corr_to_table(table, Y, X, cat, levels, pmax=pmax, nan=nan) else: printXname = True for cell in cat.cells: tlen = len(table) sub = (cat == cell) _corr_to_table(table, Y, X, sub, levels, pmax=pmax, nan=nan, printXname=printXname, label=cell_label(cell)) if len(table) > tlen: printXname = False # last row if pmax is None: p_text = '' else: p_text = 'all other p>{p}'.format(p=pmax) if nan is False and table._my_nan_count > 0: nan_text = '%s NANs' % table._my_nan_count else: nan_text = '' if p_text or nan_text: if p_text and nan_text: text = ', '.join([p_text, nan_text]) else: text = ''.join([p_text, nan_text]) table.cell("(%s)" % text) return table