Example #1
0
def _hopkins_ems(regression_model, v=False):
    """
    Returns a table that can be used
    
    v=True prints E(MS) components (False by default)
    
    """
    assert ismodel(regression_model)
    # check that no var is in model
    if any([type(f) == var for f in regression_model.factors]):
        return [None] * len(regression_model.effects)
    # E(MS) table (after Hopkins, 1976)
    E_MS_table = []
    for e in regression_model.effects:
        E_MS_row = []
        for e2 in regression_model.effects:
            if np.all([(f in e or f.random) for f in e2.factors]) and np.all(
                [(f in e2 or e2.nestedin(f)) for f in e.factors]
            ):
                E_MS_row.append(True)
            else:
                E_MS_row.append(False)
        E_MS_table.append(E_MS_row)
    E_MS = np.array(E_MS_table, dtype=bool)

    if v:
        print "E(MS) component table:\n", E_MS

    # read MS denominator for F tests from table
    MS_denominators = []
    for i, f in enumerate(regression_model.effects):
        e_ms_den = deepcopy(E_MS[i])
        e_ms_den[i] = False
        match = np.all(E_MS == e_ms_den, axis=1)
        if v:
            print f.name, ":", match
        if match.sum() == 1:
            match_i = np.where(match)[0][0]
            MS_denominators.append(match_i)
        elif match.sum() == 0:
            MS_denominators.append(None)
        else:
            raise NotImplementedError("too many matches")

    return MS_denominators
Example #2
0
def correlations(Y, Xs, cat=None, levels=[.05, .01, .001], diff=None, sub=None,
         pmax=None, nan=True):  # , match=None):
    """
    :arg var Y: first variable
    :arg var X: second variable (or list of variables)
    :arg cat: show correlations separately for different groups in the
        data. Can be a ``factor`` (the correlation for each level is shown
        separately) or an array of ``bool`` values (e.g. from a comparison like
        ``Stim==1``)
    :arg list levels: significance levels to mark
    :arg diff: (factor, cat_1, cat_2)
    :arg sub: use only a subset of the data
    :arg pmax: (None) don't show correlations with p>pmax
    :arg nan: ``True``: display correlation which yield NAN;
        ``False``: hide NANs but mention occurrence in summary (not
        implemented);
        ``None``: don't mention NANs
    :rtype: Table

    """
    levels = np.array(levels)

    if isvar(Xs):
        Xs = [Xs]

    # SUB
    if sub is not None:
        Y = Y[sub]
        Xs = [X[sub] for X in Xs]
        if ismodel(cat) or isfactor(cat):
            cat = cat[sub]

    if diff is not None:
        raise NotImplementedError

    if cat is None:
        table = fmtxt.Table('l' * 4)
        table.cells('Variable', 'r', 'p', 'n')
    else:
        assert iscategorial(cat)
        table = fmtxt.Table('l' * 5)
        table.cells('Variable', 'Category', 'r', 'p', 'n')

    table.midrule()
    table.title("Correlations with %s" % (Y.name))

    table._my_nan_count = 0

    for X in Xs:
        if cat is None:
            _corr_to_table(table, Y, X, cat, levels, pmax=pmax, nan=nan)
        else:
            printXname = True
            for cell in cat.cells:
                tlen = len(table)
                sub = (cat == cell)
                _corr_to_table(table, Y, X, sub, levels, pmax=pmax, nan=nan,
                               printXname=printXname, label=cell_label(cell))

                if len(table) > tlen:
                    printXname = False

    # last row
    if pmax is None:
        p_text = ''
    else:
        p_text = 'all other p>{p}'.format(p=pmax)
    if nan is False and table._my_nan_count > 0:
        nan_text = '%s NANs' % table._my_nan_count
    else:
        nan_text = ''
    if p_text or nan_text:
        if p_text and nan_text:
            text = ', '.join([p_text, nan_text])
        else:
            text = ''.join([p_text, nan_text])
        table.cell("(%s)" % text)
    return table