def test_chisquare_power(): from .results.results_power import pwr_chisquare for case in itervalues(pwr_chisquare): power = chisquare_power(case.w, case.N, case.df + 1, alpha=case.sig_level) assert_almost_equal(power, case.power, decimal=6, err_msg=repr(vars(case)))
def add_dict(self, d, ncols=2, align='l', float_format="%.4f"): '''Add the contents of a Dict to summary table Parameters ---------- d : dict Keys and values are automatically coerced to strings with str(). Users are encouraged to format them before using add_dict. ncols: int Number of columns of the output table align : str Data alignment (l/c/r) ''' keys = [_formatter(x, float_format) for x in iterkeys(d)] vals = [_formatter(x, float_format) for x in itervalues(d)] data = np.array(lzip(keys, vals)) if data.shape[0] % ncols != 0: pad = ncols - (data.shape[0] % ncols) data = np.vstack([data, np.array(pad * [['', '']])]) data = np.split(data, ncols) data = reduce(lambda x, y: np.hstack([x, y]), data) self.add_array(data, align=align)
def add_dict(self, d, ncols=2, align='l', float_format="%.4f"): '''Add the contents of a Dict to summary table Parameters ---------- d : dict Keys and values are automatically coerced to strings with str(). Users are encouraged to format them before using add_dict. ncols: int Number of columns of the output table align : string Data alignment (l/c/r) ''' keys = [_formatter(x, float_format) for x in iterkeys(d)] vals = [_formatter(x, float_format) for x in itervalues(d)] data = np.array(lzip(keys, vals)) if data.shape[0] % ncols != 0: pad = ncols - (data.shape[0] % ncols) data = np.vstack([data, np.array(pad * [['', '']])]) data = np.split(data, ncols) data = reduce(lambda x, y: np.hstack([x, y]), data) self.add_array(data, align=align)
def test_chisquare_power(): from .results.results_power import pwr_chisquare for case in itervalues(pwr_chisquare): power = chisquare_power( case.w, case.N, case.df + 1, alpha=case.sig_level) assert_almost_equal( power, case.power, decimal=6, err_msg=repr(vars(case)))
def interactions(terms, order=[1, 2]): """ Output all pairwise interactions of given order of a sequence of terms. The argument order is a sequence specifying which order of interactions should be generated -- the default creates main effects and two-way interactions. If order is an integer, it is changed to range(1,order+1), so order=3 is equivalent to order=[1,2,3], generating all one, two and three-way interactions. If any entry of order is greater than len(terms), it is effectively treated as len(terms). >>> print interactions([Term(l) for l in ['a', 'b', 'c']]) <formula: a*b + a*c + b*c + a + b + c> >>> >>> print interactions([Term(l) for l in ['a', 'b', 'c']], order=list(range(5))) <formula: a*b + a*b*c + a*c + b*c + a + b + c> >>> """ n_terms = len(terms) values = {} if np.asarray(order).shape == (): order = lrange(1, int(order) + 1) # First order for o in order: indices = np.indices((n_terms, ) * (o)) indices.shape = (indices.shape[0], np.product(indices.shape[1:])) for m in range(indices.shape[1]): # only keep combinations that have unique entries if (np.unique(indices[:, m]).shape == indices[:, m].shape and np.alltrue( np.equal(np.sort(indices[:, m]), indices[:, m]))): ll = [terms[j] for j in indices[:, m]] v = ll[0] for ii in range(len(ll) - 1): v *= ll[ii + 1] values[tuple(indices[:, m])] = v key = list(iterkeys(values))[0] value = values[key] del (values[key]) for v in itervalues(values): value += v return value
def interactions(terms, order=[1,2]): """ Output all pairwise interactions of given order of a sequence of terms. The argument order is a sequence specifying which order of interactions should be generated -- the default creates main effects and two-way interactions. If order is an integer, it is changed to range(1,order+1), so order=3 is equivalent to order=[1,2,3], generating all one, two and three-way interactions. If any entry of order is greater than len(terms), it is effectively treated as len(terms). >>> print interactions([Term(l) for l in ['a', 'b', 'c']]) <formula: a*b + a*c + b*c + a + b + c> >>> >>> print interactions([Term(l) for l in ['a', 'b', 'c']], order=list(range(5))) <formula: a*b + a*b*c + a*c + b*c + a + b + c> >>> """ l = len(terms) values = {} if np.asarray(order).shape == (): order = lrange(1, int(order)+1) # First order for o in order: I = np.indices((l,)*(o)) I.shape = (I.shape[0], np.product(I.shape[1:])) for m in range(I.shape[1]): # only keep combinations that have unique entries if (np.unique(I[:,m]).shape == I[:,m].shape and np.alltrue(np.equal(np.sort(I[:,m]), I[:,m]))): ll = [terms[j] for j in I[:,m]] v = ll[0] for ii in range(len(ll)-1): v *= ll[ii+1] values[tuple(I[:,m])] = v key = list(iterkeys(values))[0] value = values[key] del(values[key]) for v in itervalues(values): value += v return value
def termcolumns(self, query_term, dict=False): """ Return a list of the indices of all columns associated to a given term. """ if self.hasterm(query_term): names = query_term.names() value = OrderedDict() for name in names: value[name] = self._names.index(name) else: raise ValueError('term not in formula') if dict: return value else: return list(itervalues(value))
def update(self, params): """ Update the global odds ratio based on the current value of params. """ endog = self.model.endog_li cpp = self.cpp cached_means = self.model.cached_means # This will happen if all the clusters have only # one observation if len(cpp[0]) == 0: return tables = {} for ii in cpp[0]: tables[ii] = np.zeros((2, 2), dtype=np.float64) for i in range(self.model.num_group): endog_expval, _ = cached_means[i] emat_11 = self.get_eyy(endog_expval, i) emat_10 = endog_expval[:, None] - emat_11 emat_01 = -emat_11 + endog_expval emat_00 = 1. - (emat_11 + emat_10 + emat_01) cpp1 = cpp[i] for ky in iterkeys(cpp1): ix = cpp1[ky] tables[ky][1, 1] += emat_11[ix[:, 0], ix[:, 1]].sum() tables[ky][1, 0] += emat_10[ix[:, 0], ix[:, 1]].sum() tables[ky][0, 1] += emat_01[ix[:, 0], ix[:, 1]].sum() tables[ky][0, 0] += emat_00[ix[:, 0], ix[:, 1]].sum() cor_expval = self.pooled_odds_ratio(list(itervalues(tables))) self.dep_params *= self.crude_or / cor_expval if not np.isfinite(self.dep_params): self.dep_params = 1. warnings.warn("dep_params became inf, resetting to 1", ConvergenceWarning)
def _statistical_coloring(data): """evaluate colors from the indipendence properties of the matrix It will encounter problem if one category has all zeros """ data = _normalize_data(data, None) categories_levels = _categories_level(list(iterkeys(data))) Nlevels = len(categories_levels) total = 1.0 * sum(v for v in itervalues(data)) # count the proportion of observation # for each level that has the given name # at each level levels_count = [] for level_idx in range(Nlevels): proportion = {} for level in categories_levels[level_idx]: proportion[level] = 0.0 for key, value in iteritems(data): if level == key[level_idx]: proportion[level] += value proportion[level] /= total levels_count.append(proportion) # for each key I obtain the expected value # and it's standard deviation from a binomial distribution # under the hipothesys of independence expected = {} for key, value in iteritems(data): base = 1.0 for i, k in enumerate(key): base *= levels_count[i][k] expected[key] = base * total, np.sqrt(total * base * (1.0 - base)) # now we have the standard deviation of distance from the # expected value for each tile. We create the colors from this sigmas = dict((k, (data[k] - m) / s) for k, (m, s) in iteritems(expected)) props = {} for key, dev in iteritems(sigmas): red = 0.0 if dev < 0 else (dev / (1 + dev)) blue = 0.0 if dev > 0 else (dev / (-1 + dev)) green = (1.0 - red - blue) / 2.0 hatch = 'x' if dev > 2 else 'o' if dev < -2 else '' props[key] = {'color': [red, green, blue], 'hatch': hatch} return props
def observed_crude_oddsratio(self): """The crude odds ratio is obtained by pooling all data corresponding to a given pair of cut points (c,c'), then forming the inverse variance weighted average of these odds ratios to obtain a single OR. Since the covariate effects are ignored, this OR will generally be greater than the stratified OR. """ cpp = self.cpp endog = self.model.endog_li # Storage for the contingency tables for each (c,c') tables = {} for ii in iterkeys(cpp[0]): tables[ii] = np.zeros((2, 2), dtype=np.float64) # Get the observed crude OR for i in range(len(endog)): if len(endog[i]) == 0: continue # The observed joint values for the current cluster yvec = endog[i] endog_11 = np.outer(yvec, yvec) endog_10 = np.outer(yvec, 1 - yvec) endog_01 = np.outer(1 - yvec, yvec) endog_00 = np.outer(1 - yvec, 1 - yvec) cpp1 = cpp[i] for ky in iterkeys(cpp1): ix = cpp1[ky] tables[ky][1, 1] += endog_11[ix[:, 0], ix[:, 1]].sum() tables[ky][1, 0] += endog_10[ix[:, 0], ix[:, 1]].sum() tables[ky][0, 1] += endog_01[ix[:, 0], ix[:, 1]].sum() tables[ky][0, 0] += endog_00[ix[:, 0], ix[:, 1]].sum() return self.pooled_odds_ratio(list(itervalues(tables)))
def __init__(self, endog, exog, tree, paramsind): self.endog = endog self.datadict = exog self.tree = tree self.paramsind = paramsind self.branchsum = '' self.probs = {} self.probstxt = {} self.branchleaves = {} self.branchvalues = {} #just to keep track of returns by branches self.branchsums = {} self.bprobs = {} self.branches, self.leaves, self.branches_degenerate = getnodes(tree) self.nbranches = len(self.branches) #copied over but not quite sure yet #unique, parameter array names, #sorted alphabetically, order is/should be only internal self.paramsnames = ( sorted(set([i for j in itervalues(paramsind) for i in j])) + ['tau_%s' % bname for bname in self.branches]) self.nparams = len(self.paramsnames) #mapping coefficient names to indices to unique/parameter array self.paramsidx = dict( (name, idx) for (idx, name) in enumerate(self.paramsnames)) #mapping branch and leaf names to index in parameter array self.parinddict = dict((k, [self.paramsidx[j] for j in v]) for k, v in iteritems(self.paramsind)) self.recursionparams = 1. + np.arange(len(self.paramsnames)) #for testing that individual parameters are used in the right place self.recursionparams = np.zeros(len(self.paramsnames)) #self.recursionparams[2] = 1 self.recursionparams[-self.nbranches:] = 1 #values for tau's
def __init__(self, endog, exog, tree, paramsind): self.endog = endog self.datadict = exog self.tree = tree self.paramsind = paramsind self.branchsum = '' self.probs = {} self.probstxt = {} self.branchleaves = {} self.branchvalues = {} #just to keep track of returns by branches self.branchsums = {} self.bprobs = {} self.branches, self.leaves, self.branches_degenerate = getnodes(tree) self.nbranches = len(self.branches) #copied over but not quite sure yet #unique, parameter array names, #sorted alphabetically, order is/should be only internal self.paramsnames = (sorted(set([i for j in itervalues(paramsind) for i in j])) + ['tau_%s' % bname for bname in self.branches]) self.nparams = len(self.paramsnames) #mapping coefficient names to indices to unique/parameter array self.paramsidx = dict((name, idx) for (idx,name) in enumerate(self.paramsnames)) #mapping branch and leaf names to index in parameter array self.parinddict = dict((k, [self.paramsidx[j] for j in v]) for k,v in iteritems(self.paramsind)) self.recursionparams = 1. + np.arange(len(self.paramsnames)) #for testing that individual parameters are used in the right place self.recursionparams = np.zeros(len(self.paramsnames)) #self.recursionparams[2] = 1 self.recursionparams[-self.nbranches:] = 1 #values for tau's
def _create_labels(rects, horizontal, ax, rotation): """find the position of the label for each value of each category right now it supports only up to the four categories ax: the axis on which the label should be applied rotation: the rotation list for each side """ categories = _categories_level(list(iterkeys(rects))) if len(categories) > 4: msg = ("maximum of 4 level supported for axes labeling... and 4" "is already a lot of levels, are you sure you need them all?") raise ValueError(msg) labels = {} #keep it fixed as will be used a lot of times items = list(iteritems(rects)) vertical = not horizontal #get the axis ticks and labels locator to put the correct values! ax2 = ax.twinx() ax3 = ax.twiny() #this is the order of execution for horizontal disposition ticks_pos = [ax.set_xticks, ax.set_yticks, ax3.set_xticks, ax2.set_yticks] ticks_lab = [ ax.set_xticklabels, ax.set_yticklabels, ax3.set_xticklabels, ax2.set_yticklabels ] #for the vertical one, rotate it by one if vertical: ticks_pos = ticks_pos[1:] + ticks_pos[:1] ticks_lab = ticks_lab[1:] + ticks_lab[:1] #clean them for pos, lab in zip(ticks_pos, ticks_lab): pos([]) lab([]) #for each level, for each value in the level, take the mean of all #the sublevel that correspond to that partial key for level_idx, level in enumerate(categories): #this dictionary keep the labels only for this level level_ticks = dict() for value in level: #to which level it should refer to get the preceding #values of labels? it's rather a tricky question... #this is dependent on the side. It's a very crude management #but I couldn't think a more general way... if horizontal: if level_idx == 3: index_select = [-1, -1, -1] else: index_select = [+0, -1, -1] else: if level_idx == 3: index_select = [+0, -1, +0] else: index_select = [-1, -1, -1] #now I create the base key name and append the current value #It will search on all the rects to find the corresponding one #and use them to evaluate the mean position basekey = tuple(categories[i][index_select[i]] for i in range(level_idx)) basekey = basekey + (value, ) subset = dict( (k, v) for k, v in items if basekey == k[:level_idx + 1]) #now I extract the center of all the tiles and make a weighted #mean of all these center on the area of the tile #this should give me the (more or less) correct position #of the center of the category vals = list(itervalues(subset)) W = sum(w * h for (x, y, w, h) in vals) x_lab = sum(_get_position(x, w, h, W) for (x, y, w, h) in vals) y_lab = sum(_get_position(y, h, w, W) for (x, y, w, h) in vals) #now base on the ordering, select which position to keep #needs to be written in a more general form of 4 level are enough? #should give also the horizontal and vertical alignment side = (level_idx + vertical) % 4 level_ticks[value] = y_lab if side % 2 else x_lab #now we add the labels of this level to the correct axis ticks_pos[level_idx](list(itervalues(level_ticks))) ticks_lab[level_idx](list(iterkeys(level_ticks)), rotation=rotation[level_idx]) return labels
'h': 7, 'top': 1000} ''' modru2 = RU2NMNL(endog, datadict2, tree2, paramsind2) modru2.recursionparams[-3] = 2 modru2.recursionparams[3] = 1 print('\n\nExample 2') print('---------\n') print(modru2.calc_prob(modru2.tree)) print('Tree') pprint(modru2.tree) print('\nmodru.probs') pprint(modru2.probs) print('sum of probs', sum(list(itervalues(modru2.probs)))) print('branchvalues') print(modru2.branchvalues) print(modru.branchvalues) print('branch probabilities') print(modru.bprobs) print('degenerate branches') print(modru.branches_degenerate) ''' >>> modru.bprobs {'Fly': [], 'top': [0.0016714179077931082, 0.99832858209220687], 'Ground': []} >>> modru2.bprobs {'top': [0.25000000000000006, 0.62499999999999989, 0.12500000000000003], 'B22': [], 'B21': [], 'B1': [], 'B2': [0.40000000000000008, 0.59999999999999998], 'B3': []} '''
''' modru2 = RU2NMNL(endog, datadict2, tree2, paramsind2) modru2.recursionparams[-3] = 2 modru2.recursionparams[3] = 1 print('\n\nExample 2') print('---------\n') print(modru2.calc_prob(modru2.tree)) print('Tree') pprint(modru2.tree) print('\nmodru.probs') pprint(modru2.probs) print('sum of probs', sum(list(itervalues(modru2.probs)))) print('branchvalues') print(modru2.branchvalues) print(modru.branchvalues) print('branch probabilities') print(modru.bprobs) print('degenerate branches') print(modru.branches_degenerate) ''' >>> modru.bprobs {'Fly': [], 'top': [0.0016714179077931082, 0.99832858209220687], 'Ground': []} >>> modru2.bprobs {'top': [0.25000000000000006, 0.62499999999999989, 0.12500000000000003], 'B22': [], 'B21': [], 'B1': [], 'B2': [0.40000000000000008, 0.59999999999999998], 'B3': []}
"B21": [], "c": ["consta", "p", "time"], "d": ["consta", "p", "time"], "B22": ["x22"], "e": ["conste", "p", "hince"], "f": ["constt", "p", "hincf"], "g": ["p", "hincg"], "B3": [], "h": ["consth", "p", "h"], "top": [], } # unique, parameter array names, # sorted alphabetically, order is/should be only internal paramsnames = sorted(set([i for j in itervalues(paramsind) for i in j])) # mapping coefficient names to indices to unique/parameter array paramsidx = dict((name, idx) for (idx, name) in enumerate(paramsnames)) # mapping branch and leaf names to index in parameter array inddict = dict((k, [paramsidx[j] for j in v]) for k, v in iteritems(paramsind)) """ >>> paramsnames ['const2', 'consta', 'constb', 'conste', 'consth', 'constt', 'h', 'hince', 'hincf', 'hincg', 'p', 'time', 'x2', 'x22'] >>> parmasidx {'conste': 3, 'consta': 1, 'constb': 2, 'h': 6, 'time': 11, 'consth': 4, 'p': 10, 'constt': 5, 'const2': 0, 'x2': 12, 'x22': 13, 'hince': 7, 'hincg': 9, 'hincf': 8}
'B21': [], 'c': ['consta', 'p', 'time'], 'd': ['consta', 'p', 'time'], 'B22': ['x22'], 'e': ['conste', 'p', 'hince'], 'f': ['constt', 'p', 'hincf'], 'g': ['p', 'hincg'], 'B3': [], 'h': ['consth', 'p', 'h'], 'top': [] } #unique, parameter array names, #sorted alphabetically, order is/should be only internal paramsnames = sorted(set([i for j in itervalues(paramsind) for i in j])) #mapping coefficient names to indices to unique/parameter array paramsidx = dict((name, idx) for (idx, name) in enumerate(paramsnames)) #mapping branch and leaf names to index in parameter array inddict = dict((k, [paramsidx[j] for j in v]) for k, v in iteritems(paramsind)) ''' >>> paramsnames ['const2', 'consta', 'constb', 'conste', 'consth', 'constt', 'h', 'hince', 'hincf', 'hincg', 'p', 'time', 'x2', 'x22'] >>> parmasidx {'conste': 3, 'consta': 1, 'constb': 2, 'h': 6, 'time': 11, 'consth': 4, 'p': 10, 'constt': 5, 'const2': 0, 'x2': 12, 'x22': 13, 'hince': 7, 'hincg': 9, 'hincf': 8} >>> inddict
def _create_labels(rects, horizontal, ax, rotation): """find the position of the label for each value of each category right now it supports only up to the four categories ax: the axis on which the label should be applied rotation: the rotation list for each side """ categories = _categories_level(list(iterkeys(rects))) if len(categories) > 4: msg = ("maximum of 4 level supported for axes labeling..and 4" "is alreay a lot of level, are you sure you need them all?") raise NotImplementedError(msg) labels = {} #keep it fixed as will be used a lot of times items = list(iteritems(rects)) vertical = not horizontal #get the axis ticks and labels locator to put the correct values! ax2 = ax.twinx() ax3 = ax.twiny() #this is the order of execution for horizontal disposition ticks_pos = [ax.set_xticks, ax.set_yticks, ax3.set_xticks, ax2.set_yticks] ticks_lab = [ax.set_xticklabels, ax.set_yticklabels, ax3.set_xticklabels, ax2.set_yticklabels] #for the vertical one, rotate it by one if vertical: ticks_pos = ticks_pos[1:] + ticks_pos[:1] ticks_lab = ticks_lab[1:] + ticks_lab[:1] #clean them for pos, lab in zip(ticks_pos, ticks_lab): pos([]) lab([]) #for each level, for each value in the level, take the mean of all #the sublevel that correspond to that partial key for level_idx, level in enumerate(categories): #this dictionary keep the labels only for this level level_ticks = dict() for value in level: #to which level it should refer to get the preceding #values of labels? it's rather a tricky question... #this is dependent on the side. It's a very crude management #but I couldn't think a more general way... if horizontal: if level_idx == 3: index_select = [-1, -1, -1] else: index_select = [+0, -1, -1] else: if level_idx == 3: index_select = [+0, -1, +0] else: index_select = [-1, -1, -1] #now I create the base key name and append the current value #It will search on all the rects to find the corresponding one #and use them to evaluate the mean position basekey = tuple(categories[i][index_select[i]] for i in range(level_idx)) basekey = basekey + (value,) subset = dict((k, v) for k, v in items if basekey == k[:level_idx + 1]) #now I extract the center of all the tiles and make a weighted #mean of all these center on the area of the tile #this should give me the (more or less) correct position #of the center of the category vals = list(itervalues(subset)) W = sum(w * h for (x, y, w, h) in vals) x_lab = sum(_get_position(x, w, h, W) for (x, y, w, h) in vals) y_lab = sum(_get_position(y, h, w, W) for (x, y, w, h) in vals) #now base on the ordering, select which position to keep #needs to be written in a more general form of 4 level are enough? #should give also the horizontal and vertical alignment side = (level_idx + vertical) % 4 level_ticks[value] = y_lab if side % 2 else x_lab #now we add the labels of this level to the correct axis ticks_pos[level_idx](list(itervalues(level_ticks))) ticks_lab[level_idx](list(iterkeys(level_ticks)), rotation=rotation[level_idx]) return labels