Esempio n. 1
0
 def __str__(self):
     f_names = [f.name for f in self._factors.values() if _vsl.isfactor(f)]
     v_names = [f.name for f in self._factors.values() if _vsl.isvar(f)]
     out = 'Variables:\n' + ', '.join(sorted(v_names))
     out += '\nFactors:\n' + ', '.join(sorted(f_names))
     if hasattr(self, '_stats'):
         out += '\n\nSEGMENTS:\n' + ', '.join(f.name for f in self._stats.values())
     return out
Esempio n. 2
0
def correlations(Y, Xs, cat=None, levels=[.05, .01, .001], diff=None, sub=None,
         pmax=None, nan=True):  # , match=None):
    """
    :arg var Y: first variable
    :arg var X: second variable (or list of variables)
    :arg cat: show correlations separately for different groups in the
        data. Can be a ``factor`` (the correlation for each level is shown
        separately) or an array of ``bool`` values (e.g. from a comparison like
        ``Stim==1``)
    :arg list levels: significance levels to mark
    :arg diff: (factor, cat_1, cat_2)
    :arg sub: use only a subset of the data
    :arg pmax: (None) don't show correlations with p>pmax
    :arg nan: ``True``: display correlation which yield NAN;
        ``False``: hide NANs but mention occurrence in summary (not
        implemented);
        ``None``: don't mention NANs
    :rtype: Table

    """
    levels = np.array(levels)

    if isvar(Xs):
        Xs = [Xs]

    # SUB
    if sub is not None:
        Y = Y[sub]
        Xs = [X[sub] for X in Xs]
        if ismodel(cat) or isfactor(cat):
            cat = cat[sub]

    if diff is not None:
        raise NotImplementedError

    if cat is None:
        table = fmtxt.Table('l' * 4)
        table.cells('Variable', 'r', 'p', 'n')
    else:
        assert iscategorial(cat)
        table = fmtxt.Table('l' * 5)
        table.cells('Variable', 'Category', 'r', 'p', 'n')

    table.midrule()
    table.title("Correlations with %s" % (Y.name))

    table._my_nan_count = 0

    for X in Xs:
        if cat is None:
            _corr_to_table(table, Y, X, cat, levels, pmax=pmax, nan=nan)
        else:
            printXname = True
            for cell in cat.cells:
                tlen = len(table)
                sub = (cat == cell)
                _corr_to_table(table, Y, X, sub, levels, pmax=pmax, nan=nan,
                               printXname=printXname, label=cell_label(cell))

                if len(table) > tlen:
                    printXname = False

    # last row
    if pmax is None:
        p_text = ''
    else:
        p_text = 'all other p>{p}'.format(p=pmax)
    if nan is False and table._my_nan_count > 0:
        nan_text = '%s NANs' % table._my_nan_count
    else:
        nan_text = ''
    if p_text or nan_text:
        if p_text and nan_text:
            text = ', '.join([p_text, nan_text])
        else:
            text = ''.join([p_text, nan_text])
        table.cell("(%s)" % text)
    return table
Esempio n. 3
0
def data(Y, X=None, match=None, cov=[], sub=None, fmt=None, labels=True, 
          showcase=True):
    """
    return a textab.table (printed as tsv table by default)
    
    parameters
    ----------
    Y: variable to display (can be model with several dependents)

    X: categories defining cells (factorial model)

    match: factor to match values on and return repeated-measures table

    cov: covariate to report (WARNING: only works with match, where each value
         on the matching variable corresponds with one value in the covariate)

    sub: boolean array specifying which values to include (generate e.g. 
         with 'sub=T==[1,2]')

    fmt: Format string  
            
    labels: display labels for nominal variables (otherwise display codes)

    """
    if hasattr(Y, '_items'): # dataframe
        Y = Y._items
    Y = _data.asmodel(Y)
    if _data.isfactor(cov) or _data.isvar(cov):
        cov = [cov]
    
    data = []
    names_yname = [] # names including Yi.name for matched table headers
    ynames = [] # names of Yi for independent measures table headers
    within_list = []
    for Yi in Y.effects:
        _data, datalabels, names, _within = _data._split_Y(Yi, X, match=match, 
                                                     sub=sub, datalabels=match)
        data += _data
        names_yname += ['({c})'.format(c=n) for n in names]
        ynames.append(Yi.name)
        within_list.append(_within)
    within = within_list[0]
    assert all([w==within for w in within_list])
    
    # table
    n_dependents = len(Y.effects)
    n_cells = int(len(data) / n_dependents)
    if within:
        n, k = len(data[0]), len(data)
        table = textab.Table('l' * (k + showcase + len(cov)))
        
        # header line 1
        if showcase:
            table.cell(match.name)
            case_labels = datalabels[0]
            assert all([np.all(case_labels==l) for l in datalabels[1:]])
        for i in range(n_dependents):
            for name in names:        
                table.cell(name.replace(' ','_'))
        for c in cov:
            table.cell(c.name)
        
        # header line 2
        if n_dependents > 1:
            if showcase:
                table.cell()
            for name in ynames:
                [table.cell('(%s)'%name) for i in range(n_cells)]
            for c in cov:
                table.cell()
        
        # body
        table.midrule()
        for i in range(n):
            case = case_labels[i]
            if showcase:
                table.cell(case)
            for j in range(k):
                table.cell(data[j][i], fmt=fmt)
            # covariates
            indexes = match==case
            for c in cov:
                # test it's all the same values
                case_cov = c[indexes]
                if len(np.unique(case_cov.x)) != 1: 
                    msg = 'covariate for case "%s" has several values'%case
                    raise ValueError(msg)
                # get value
                first_i = np.nonzero(indexes)[0][0]
                cov_value = c[first_i]
                if _data.isfactor(c) and labels:
                    cov_value = c.cells[cov_value]
                table.cell(cov_value, fmt=fmt)
    else:
        table = textab.Table('l'*(1 + n_dependents))
        table.cell(X.name)
        [table.cell(y) for y in ynames]
        table.midrule()
        # data is now sorted: (cell_i within dependent_i)
        # sort data as (X-cell, dependent_i)
        data_sorted = []
        for i_cell in range(n_cells):
            data_sorted.append([data[i_dep*n_cells + i_cell] for i_dep in \
                               range(n_dependents)])
        # table
        for name, cell_data in zip(names, data_sorted):
            for i in range(len(cell_data[0])):
                table.cell(name)
                for dep_data in cell_data:
                    v = dep_data[i]
                    table.cell(v, fmt=fmt)
    return table