def oneway(Y, X, match=None, sub=None, par=True, title=None): "data: for should iter over groups/treatments" ct = celltable(Y, X, match=match, sub=sub) test = _oneway(ct, parametric=par) template = "{test}: {statistic}={value}{stars}, p={p}" out = template.format(**test) return out
def _resample(Y, unit=None, replacement=True, samples=1000): """ Generator function to resample a dependent variable (Y) multiple times unit: factor specdifying unit of measurement (e.g. subject). If unit is specified, resampling proceeds by first resampling the categories of unit (with or without replacement) and then shuffling the values within unites (no replacement). replacement: whether random samples should be drawn with replacement or without samples: number of samples to yield """ if isvar(Y): Yout = Y.copy('_resampled') Y else: Y = var(Y) Yout = var(Y.copy(), name="Y resampled") if unit: ct = celltable(Y, unit) unit_data = ct.get_data(out=list) unit_indexes = ct.data_indexes.values() x_out = Yout.x if replacement: n = len(ct.indexes) for sample in xrange(samples): source_ids = np.random.randint(n, size=n) for index, source_index in zip(unit_indexes, source_ids): data = unit_data[source_index] np.random.shuffle(data) x_out[index] = data yield Yout else: for sample in xrange(samples): random.shuffle(unit_data) for index, data in zip(unit_indexes, unit_data): np.random.shuffle(data) x_out[index] = data yield Yout else: if replacement: N = Y.N for i in xrange(samples): index = np.random.randint(N) Yout.x = Y.x[index] yield Yout else: for i in xrange(samples): np.random.shuffle(Yout.x) yield Yout
def frequencies(Y, X, sub=None, title="{Yname} Frequencies"): """ Display frequency of occurrence of all categories in Y in the cells defined by X. Y: vactor whose ferquencies are of interest X: model defining cells for which frequencies are displayed """ Y = _data.asfactor(Y) X = _data.asfactor(X) cells = _structure.celltable(Y, X, sub=sub) Y_categories = cells.Y.cells.keys() # header n_Y_categories = len(Y_categories) table = textab.Table('l' * (n_Y_categories+1)) # header line 1 table.cell() table.cell(Y.name, width=n_Y_categories, just='c') table.midrule(span=(2, n_Y_categories+1)) # header line 2 table.cell(X.name) for cat_id in Y_categories: table.cell(cells.Y.cells[cat_id]) table.midrule() # body for cell_id in cells.indexes: table.cell(cells.cells[cell_id]) data = cells.data[cell_id] for cat_id in Y_categories: n = np.sum(cat_id == data) table.cell(n) # title if title: title = title.format(Yname=Y.name.capitalize()) table.title(title) return table
def _resample(Y, unit=None, replacement=True, samples=1000): """ Generator function to resample a dependent variable (Y) multiple times Y : var | ndvar Variable which is to be resampled; a copy of ``Y`` is yielded in each iteration. unit : categorial factor specifying unit of measurement (e.g. subject). If unit is specified, resampling proceeds by first resampling the categories of unit (with or without replacement) and then shuffling the values within unites (no replacement). replacement : bool whether random samples should be drawn with replacement or without samples : int number of samples to yield """ if isvar(Y): pass elif isndvar(Y): if not Y.has_case: raise ValueError("Need ndvar with cases") else: raise TypeError("need var or ndvar") Yout = Y.copy('{name}_resampled') if unit: # not implemented ct = celltable(Y, unit) unit_data = ct.get_data(out=list) unit_indexes = ct.data_indexes.values() x_out = Yout.x if replacement: n = len(ct.indexes) for i in xrange(samples): source_ids = np.random.randint(n, size=n) for index, source_index in zip(unit_indexes, source_ids): data = unit_data[source_index] np.random.shuffle(data) x_out[index] = data yield i, Yout else: for i in xrange(samples): random.shuffle(unit_data) for index, data in zip(unit_indexes, unit_data): np.random.shuffle(data) x_out[index] = data yield i, Yout else: # OK if replacement: N = len(Y) for i in xrange(samples): index = np.random.randint(N, N) Yout.x = Y.x[index] yield i, Yout else: # OK for i in xrange(samples): np.random.shuffle(Yout.x) yield i, Yout
def pairwise(Y, X, match=None, sub=None, # data in par=True, corr='Hochberg', trend=True, # stats title='{desc}', mirror=False, # layout ): """ pairwise comparison according to factor structure """ ct = celltable(Y, X, match=match, sub=sub) test = _pairwise(ct.get_data(), within=ct.all_within, parametric=par, corr=corr, # levels=levels, trend=trend) # extract test results k = len(ct) indexes = test['pw_indexes'] statistic = test['statistic'] _K = test[statistic] _P = test['p'] if corr: _Pc = mcp_adjust(_P, corr) _df = test['df'] _NStars = test['stars'] symbols = test['symbols'] # create TABLE table = fmtxt.Table('l' + 'l' * (k - 1 + mirror)) title_desc = "Pairwise {0}".format(test['test']) table.title(title.format(desc=title_desc)) table.caption(test['caption']) # headings table.cell() cell_labels = ct.cell_labels() for name in cell_labels[1 - mirror:]: table.cell(name) table.midrule() # tex_df = fmtxt.Element(df, "_", digits=0) if corr and not mirror: subrows = range(3) else: subrows = range(2) for row in range(0, k - 1 + mirror): for subrow in subrows: # contains t/p # names column if subrow is 0: table.cell(cell_labels[row], r"\textbf") else: table.cell() # rows for col in range(1 - mirror, k): if row == col: table.cell() elif col > row: index = indexes[(row, col)] if subrow is 0: tex_cell = fmtxt.eq(statistic, _K[index], df=_df[index], stars=symbols[index], of=3 + trend) elif subrow is 1: tex_cell = fmtxt.eq('p', _P[index], fmt='%.3f', drop0=True) elif subrow is 2: tex_cell = fmtxt.eq('p', _Pc[index], df='c', fmt='%.3f', drop0=True) table.cell(tex_cell) else: if mirror and corr and subrow == 0: index = indexes[(col, row)] p = _Pc[index] table.cell(p, fmt='%.3f', drop0=True) else: table.cell() return table
def test(Y, X=None, against=0, match=None, sub=None, par=True, corr='Hochberg', title='{desc}'): """ One-sample tests. kwargs ------ X: perform tests separately for all categories in X. Against: can be - value - string (category in X) """ ct = celltable(Y, X, match, sub) if par: title_desc = "t-tests against %s" % against statistic_name = 't' else: raise NotImplementedError names = []; ts = []; dfs = []; ps = [] if isinstance(against, str): k = len(ct.indexes) - 1 assert against in ct.cells for id in ct.indexes: label = ct.cells[id] if against == label: baseline_id = id baseline = ct.data[id] for id in ct.indexes: if id == baseline_id: continue names.append(ct.cells[id]) if (ct.within is not False) and ct.within[id, baseline_id]: t, p = scipy.stats.ttest_rel(baseline, ct.data[id]) df = len(baseline) - 1 else: data = ct.data[id] t, p = scipy.stats.ttest_ind(baseline, data) df = len(baseline) + len(data) - 2 ts.append(t) dfs.append(df) ps.append(p) elif np.isscalar(against): k = len(ct.cells) for id in ct.indexes: label = ct.cells[id] data = ct.data[id] t, p = scipy.stats.ttest_1samp(data, against) df = len(data) - 1 names.append(label); ts.append(t); dfs.append(df); ps.append(p) if corr: ps_adjusted = mcp_adjust(ps, corr) else: ps_adjusted = np.zeros(len(ps)) stars = star(ps, out=str) # , levels=levels, trend=trend, corr=corr if len(np.unique(dfs)) == 1: df_in_header = True else: df_in_header = False table = fmtxt.Table('l' + 'r' * (3 - df_in_header + bool(corr))) table.title(title.format(desc=title_desc)) if corr: table.caption(_get_correction_caption(corr, k)) # header table.cell("Effect") if df_in_header: table.cell([statistic_name, fmtxt.texstr(dfs[0], property='_'), ], mat=True) else: table.cell(statistic_name, mat=True) table.cell('df', mat=True) table.cell('p', mat=True) if corr: table.cell(fmtxt.symbol('p', df=corr)) table.midrule() # body for name, t, mark, df, p, p_adj in zip(names, ts, stars, dfs, ps, ps_adjusted): table.cell(name) tex_stars = fmtxt.Stars(mark, of=3) tex_t = fmtxt.texstr(t, fmt='%.2f') table.cell([tex_t, tex_stars]) if not df_in_header: table.cell(df) table.cell(fmtxt.p(p)) if corr: table.cell(fmtxt.p(p_adj)) return table
def stats(Y, y, x=None, match = None, sub=None, fmt='%.4g', funcs=[np.mean]): """ return a table with statistics per cell. y and x are models specifying cells. funcs is a list of functions to show (all functions must return scalars) """ y_ids = sorted(y.cells.keys()) if x is None: cells = _structure.celltable(Y, y, sub=sub, match=match) # table header n_disp = len(funcs) table = textab.Table('l'*(n_disp+1)) table.cell('Condition', 'bf') for func in funcs: table.cell(func.__name__, 'bf') table.midrule() # table entries for id in y_ids: label = cells.cells[id] data = cells.data[id] table.cell(label) for func in funcs: table.cell(fmt % func(data)) else: cells = _structure.celltable(Y, y%x, sub=sub, match=match) table = textab.Table('l'*(len(x.cells)+1)) x_ids = sorted(x.cells.keys()) # table header table.cell() table.cell(x.name, width=len(x_ids), just='c') table.midrule(span=(2, 1+len(x_ids))) table.newrow() table.cell() for xid in x_ids: table.cell(x.cells[xid]) table.midrule() # table body fmt_n = fmt.count('%') if fmt_n == 1: fmt_once = False elif len(funcs) == fmt_n: fmt_once = True else: raise ValueError("fmt does not match funcs") for id in y_ids: table.cell(y.cells[id]) for xid in x_ids: # construct address a = () if isinstance(id, tuple): a += id else: a += (id,) if isinstance(xid, tuple): a += xid else: a += (xid,) # cell data = cells.data[a] values = (f(data) for f in funcs) if fmt_once: txt = fmt % values else: txt = ', '.join((fmt % v for v in values)) table.cell(txt) return table