Exemple #1
0
def oneway(Y, X, match=None, sub=None, par=True, title=None):
    "data: for should iter over groups/treatments"
    ct = celltable(Y, X, match=match, sub=sub)
    test = _oneway(ct, parametric=par)
    template = "{test}: {statistic}={value}{stars}, p={p}"
    out = template.format(**test)
    return out
Exemple #2
0
def _resample(Y, unit=None, replacement=True, samples=1000):
    """
    Generator function to resample a dependent variable (Y) multiple times
    
    unit: factor specdifying unit of measurement (e.g. subject). If unit is 
          specified, resampling proceeds by first resampling the categories of 
          unit (with or without replacement) and then shuffling the values 
          within unites (no replacement). 
    replacement: whether random samples should be drawn with replacement or 
                 without
    samples: number of samples to yield
    
    """
    if isvar(Y):
        Yout = Y.copy('_resampled')
        Y
    else:
        Y = var(Y)
        Yout = var(Y.copy(), name="Y resampled")
    
    if unit:
        ct = celltable(Y, unit)
        unit_data = ct.get_data(out=list)
        unit_indexes = ct.data_indexes.values()
        x_out = Yout.x
        
        if replacement:
            n = len(ct.indexes)
            for sample in xrange(samples):
                source_ids = np.random.randint(n, size=n)
                for index, source_index in zip(unit_indexes, source_ids):
                    data = unit_data[source_index]
                    np.random.shuffle(data)
                    x_out[index] = data
                yield Yout
            
        else:
            for sample in xrange(samples):
                random.shuffle(unit_data)
                for index, data in zip(unit_indexes, unit_data):
                    np.random.shuffle(data)
                    x_out[index] = data
                yield Yout
            
    else:
        if replacement:
            N = Y.N
            for i in xrange(samples):
                index = np.random.randint(N)
                Yout.x = Y.x[index]
                yield Yout
        else:
            for i in xrange(samples):
                np.random.shuffle(Yout.x)
                yield Yout
Exemple #3
0
def frequencies(Y, X, sub=None, title="{Yname} Frequencies"):
    """
    Display frequency of occurrence of all categories in Y in the cells 
    defined by X.
    
    Y: vactor whose ferquencies are of interest
    X: model defining cells for which frequencies are displayed
    
    """
    Y = _data.asfactor(Y)
    X = _data.asfactor(X)
    
    cells = _structure.celltable(Y, X, sub=sub)
    
    Y_categories = cells.Y.cells.keys()
    
    # header
    n_Y_categories = len(Y_categories)
    table = textab.Table('l' * (n_Y_categories+1))
    # header line 1
    table.cell()
    table.cell(Y.name, width=n_Y_categories, just='c')
    table.midrule(span=(2, n_Y_categories+1))
    # header line 2
    table.cell(X.name)
    for cat_id in Y_categories:
        table.cell(cells.Y.cells[cat_id])
    table.midrule()
    
    # body
    for cell_id in cells.indexes:
        table.cell(cells.cells[cell_id])
        data = cells.data[cell_id]
        for cat_id in Y_categories:
            n = np.sum(cat_id == data)
            table.cell(n)
    
    # title
    if title:
        title = title.format(Yname=Y.name.capitalize())
        table.title(title)
    
    return table
Exemple #4
0
def _resample(Y, unit=None, replacement=True, samples=1000):
    """
    Generator function to resample a dependent variable (Y) multiple times

    Y : var | ndvar
        Variable which is to be resampled; a copy of ``Y`` is yielded in each
        iteration.

    unit : categorial
        factor specifying unit of measurement (e.g. subject). If unit is
        specified, resampling proceeds by first resampling the categories of
        unit (with or without replacement) and then shuffling the values
        within unites (no replacement).

    replacement : bool
        whether random samples should be drawn with replacement or
        without

    samples : int
        number of samples to yield

    """
    if isvar(Y):
        pass
    elif isndvar(Y):
        if not Y.has_case:
            raise ValueError("Need ndvar with cases")
    else:
        raise TypeError("need var or ndvar")

    Yout = Y.copy('{name}_resampled')

    if unit:  # not implemented
        ct = celltable(Y, unit)
        unit_data = ct.get_data(out=list)
        unit_indexes = ct.data_indexes.values()
        x_out = Yout.x

        if replacement:
            n = len(ct.indexes)
            for i in xrange(samples):
                source_ids = np.random.randint(n, size=n)
                for index, source_index in zip(unit_indexes, source_ids):
                    data = unit_data[source_index]
                    np.random.shuffle(data)
                    x_out[index] = data
                yield i, Yout

        else:
            for i in xrange(samples):
                random.shuffle(unit_data)
                for index, data in zip(unit_indexes, unit_data):
                    np.random.shuffle(data)
                    x_out[index] = data
                yield i, Yout
    else:  # OK
        if replacement:
            N = len(Y)
            for i in xrange(samples):
                index = np.random.randint(N, N)
                Yout.x = Y.x[index]
                yield i, Yout
        else:  # OK
            for i in xrange(samples):
                np.random.shuffle(Yout.x)
                yield i, Yout
Exemple #5
0
def pairwise(Y, X, match=None, sub=None,  # data in
             par=True, corr='Hochberg', trend=True,  # stats
             title='{desc}', mirror=False,  # layout
             ):
    """
    pairwise comparison according to factor structure

    """
    ct = celltable(Y, X, match=match, sub=sub)
    test = _pairwise(ct.get_data(), within=ct.all_within, parametric=par, corr=corr,  # levels=levels,
                     trend=trend)

    # extract test results
    k = len(ct)
    indexes = test['pw_indexes']
    statistic = test['statistic']
    _K = test[statistic]
    _P = test['p']
    if corr:
        _Pc = mcp_adjust(_P, corr)
    _df = test['df']
    _NStars = test['stars']
    symbols = test['symbols']

    # create TABLE
    table = fmtxt.Table('l' + 'l' * (k - 1 + mirror))
    title_desc = "Pairwise {0}".format(test['test'])
    table.title(title.format(desc=title_desc))
    table.caption(test['caption'])

    # headings
    table.cell()
    cell_labels = ct.cell_labels()
    for name in cell_labels[1 - mirror:]:
        table.cell(name)
    table.midrule()

    # tex_df = fmtxt.Element(df, "_", digits=0)
    if corr and not mirror:
        subrows = range(3)
    else:
        subrows = range(2)

    for row in range(0, k - 1 + mirror):
        for subrow in subrows:  # contains t/p
            # names column
            if subrow is 0:
                table.cell(cell_labels[row], r"\textbf")
            else:
                table.cell()
            # rows
            for col in range(1 - mirror, k):
                if row == col:
                    table.cell()
                elif col > row:
                    index = indexes[(row, col)]
                    if subrow is 0:
                        tex_cell = fmtxt.eq(statistic, _K[index], df=_df[index],
                                             stars=symbols[index],
                                             of=3 + trend)
                    elif subrow is 1:
                        tex_cell = fmtxt.eq('p', _P[index], fmt='%.3f', drop0=True)
                    elif subrow is 2:
                        tex_cell = fmtxt.eq('p', _Pc[index], df='c',
                                             fmt='%.3f', drop0=True)
                    table.cell(tex_cell)
                else:
                    if mirror and corr and subrow == 0:
                        index = indexes[(col, row)]
                        p = _Pc[index]
                        table.cell(p, fmt='%.3f', drop0=True)
                    else:
                        table.cell()
    return table
Exemple #6
0
def test(Y, X=None, against=0, match=None, sub=None,
         par=True, corr='Hochberg',
         title='{desc}'):
    """
    One-sample tests.

    kwargs
    ------
    X: perform tests separately for all categories in X.
    Against: can be
             - value
             - string (category in X)

    """
    ct = celltable(Y, X, match, sub)

    if par:
        title_desc = "t-tests against %s" % against
        statistic_name = 't'
    else:
        raise NotImplementedError

    names = []; ts = []; dfs = []; ps = []

    if isinstance(against, str):
        k = len(ct.indexes) - 1
        assert against in ct.cells
        for id in ct.indexes:
            label = ct.cells[id]
            if against == label:
                baseline_id = id
                baseline = ct.data[id]

        for id in ct.indexes:
            if id == baseline_id:
                continue
            names.append(ct.cells[id])
            if (ct.within is not False) and ct.within[id, baseline_id]:
                t, p = scipy.stats.ttest_rel(baseline, ct.data[id])
                df = len(baseline) - 1
            else:
                data = ct.data[id]
                t, p = scipy.stats.ttest_ind(baseline, data)
                df = len(baseline) + len(data) - 2
            ts.append(t)
            dfs.append(df)
            ps.append(p)

    elif np.isscalar(against):
        k = len(ct.cells)

        for id in ct.indexes:
            label = ct.cells[id]
            data = ct.data[id]
            t, p = scipy.stats.ttest_1samp(data, against)
            df = len(data) - 1
            names.append(label); ts.append(t); dfs.append(df); ps.append(p)

    if corr:
        ps_adjusted = mcp_adjust(ps, corr)
    else:
        ps_adjusted = np.zeros(len(ps))
    stars = star(ps, out=str)  # , levels=levels, trend=trend, corr=corr
    if len(np.unique(dfs)) == 1:
        df_in_header = True
    else:
        df_in_header = False

    table = fmtxt.Table('l' + 'r' * (3 - df_in_header + bool(corr)))
    table.title(title.format(desc=title_desc))
    if corr:
        table.caption(_get_correction_caption(corr, k))

    # header
    table.cell("Effect")
    if df_in_header:
        table.cell([statistic_name,
                    fmtxt.texstr(dfs[0], property='_'),
                    ], mat=True)
    else:
        table.cell(statistic_name, mat=True)
        table.cell('df', mat=True)
    table.cell('p', mat=True)
    if corr:
        table.cell(fmtxt.symbol('p', df=corr))
    table.midrule()

    # body
    for name, t, mark, df, p, p_adj in zip(names, ts, stars, dfs, ps, ps_adjusted):
        table.cell(name)
        tex_stars = fmtxt.Stars(mark, of=3)
        tex_t = fmtxt.texstr(t, fmt='%.2f')
        table.cell([tex_t, tex_stars])
        if not df_in_header:
            table.cell(df)

        table.cell(fmtxt.p(p))
        if corr:
            table.cell(fmtxt.p(p_adj))
    return table
Exemple #7
0
def stats(Y, y, x=None, match = None, sub=None, fmt='%.4g', funcs=[np.mean]):
    """
    return a table with statistics per cell.
    
    y and x are models specifying cells.
    
    funcs is a list of functions to show (all functions must return scalars)
    
    """
    y_ids = sorted(y.cells.keys())
    
    if x is None:
        cells = _structure.celltable(Y, y, sub=sub, match=match)
        
        # table header
        n_disp = len(funcs)
        table = textab.Table('l'*(n_disp+1))
        table.cell('Condition', 'bf')
        for func in funcs:
            table.cell(func.__name__, 'bf')
        table.midrule()
        
        # table entries
        for id in y_ids:
            label = cells.cells[id]
            data = cells.data[id]
            table.cell(label)
            for func in funcs:
                table.cell(fmt % func(data))
    else:
        cells = _structure.celltable(Y, y%x, sub=sub, match=match)
        
        table = textab.Table('l'*(len(x.cells)+1))
        x_ids = sorted(x.cells.keys())
        
        # table header
        table.cell()
        table.cell(x.name, width=len(x_ids), just='c')
        table.midrule(span=(2, 1+len(x_ids)))
        table.newrow()
        table.cell()
        
        for xid in x_ids:
            table.cell(x.cells[xid])
        table.midrule()
        
        # table body
        fmt_n = fmt.count('%')
        if fmt_n == 1:
            fmt_once = False
        elif len(funcs) == fmt_n:
            fmt_once = True
        else:
            raise ValueError("fmt does not match funcs")
        
        for id in y_ids:
            table.cell(y.cells[id])
            for xid in x_ids:
                # construct address
                a = ()
                if isinstance(id, tuple):
                    a += id
                else:
                    a += (id,)
                if isinstance(xid, tuple):
                    a += xid
                else:
                    a += (xid,)
                
                # cell
                data = cells.data[a]
                values = (f(data) for f in funcs)
                if fmt_once:
                    txt = fmt % values
                else:
                    txt = ', '.join((fmt % v for v in values))

                table.cell(txt)
    
    return table