def frequencies(Y, X, sub=None, title="{Yname} Frequencies"): """ Display frequency of occurrence of all categories in Y in the cells defined by X. Y: vactor whose ferquencies are of interest X: model defining cells for which frequencies are displayed """ Y = _data.asfactor(Y) X = _data.asfactor(X) cells = _structure.celltable(Y, X, sub=sub) Y_categories = cells.Y.cells.keys() # header n_Y_categories = len(Y_categories) table = textab.Table('l' * (n_Y_categories+1)) # header line 1 table.cell() table.cell(Y.name, width=n_Y_categories, just='c') table.midrule(span=(2, n_Y_categories+1)) # header line 2 table.cell(X.name) for cat_id in Y_categories: table.cell(cells.Y.cells[cat_id]) table.midrule() # body for cell_id in cells.indexes: table.cell(cells.cells[cell_id]) data = cells.data[cell_id] for cat_id in Y_categories: n = np.sum(cat_id == data) table.cell(n) # title if title: title = title.format(Yname=Y.name.capitalize()) table.title(title) return table
def __init__(self, Y, X, match=None, sub=None, samples=1000, replacement=True, title="Bootstrapped Pairwise Tests"): Y = asvar(Y, sub) X = asfactor(X, sub) assert len(Y) == len(X), "dataset length mismatch" if match: if sub is not None: match = match[sub] assert len(match) == len(Y), "dataset length mismatch" # prepare data container resampled = np.empty((samples + 1, len(Y))) # sample X subject within category resampled[0] = Y.x # fill resampled for i, Y_resampled in _resample(Y, unit=match, samples=samples, replacement=replacement): resampled[i + 1] = Y_resampled.x self.resampled = resampled cells = X.cells n_groups = len(cells) if match: # if there are several values per X%match cell, take the average # T: indexes to transform Y.x to [X%match, value]-array match_cell_ids = match.cells group_size = len(match_cell_ids) T = None; i = 0 for X_cell in cells: for match_cell in match_cell_ids: source_indexes = np.where((X == X_cell) * (match == match_cell))[0] if T is None: n_cells = n_groups * group_size T = np.empty((n_cells, len(source_indexes)), dtype=int) T[i, :] = source_indexes i += 1 if T.shape[1] == 1: T = T[:, 0] ordered = resampled[:, T] else: ordered = resampled[:, T].mean(axis=2) self.ordered = ordered # t-tests n_comparisons = sum(range(n_groups)) t = np.empty((samples + 1, n_comparisons)) comp_names = [] one_group = np.arange(group_size) groups = [one_group + i * group_size for i in range(n_groups)] for i, (g1, g2) in enumerate(itertools.combinations(range(n_groups), 2)): group_1 = groups[g1] group_2 = groups[g2] diffs = ordered[:, group_1] - ordered[:, group_2] t[:, i] = np.mean(diffs, axis=1) * np.sqrt(group_size) / np.std(diffs, axis=1, ddof=1) comp_names.append(' - '.join((cells[g1], cells[g2]))) self.diffs = diffs self.t_resampled = np.max(np.abs(t[1:]), axis=1) self.t = t = t[0] else: raise NotImplementedError self._Y = Y self._X = X self._group_names = cells self._group_data = np.array([ordered[0, g] for g in groups]) self._group_size = group_size self._df = group_size - 1 self._match = match self._n_samples = samples self._replacement = replacement self._comp_names = comp_names self._p_parametric = self.test_param(t) self._p_boot = self.test_boot(t) self.title = title