def __init__(self, observed, expected=None): """Parameters ------------- observed a DictArray instance, or something that can be converted to one. Values must be integers. expected provide in the case where you know the prior proportions, otherwise calculated from marginal frequencies """ if not isinstance(observed, DictArray): observed = DictArray(observed) # make sure values are int observed.array = _astype(observed.array, int) if observed.array.sum() == 0: raise ValueError("at least one value must be > 0") if observed.array.min() < 0: raise ValueError("negative values encountered") if observed.array.ndim > 2: raise NotImplementedError("not designed for >2D") self._observed = observed self.expected = expected self._residuals = None self._df = None self.shape = observed.shape
def test_to_dict_roundtrip(self): """roundtrip of DictArray.to_dict() should produce same order.""" d1 = dict(a=dict(k=1, l=2, m=3), b=dict(k=4, l=5, m=6)) darr1 = DictArray(d1) d2 = darr1.to_dict() darr2 = DictArray(d2) self.assertEqual(d1, d2) d3 = DictArray(d2) self.assertEqual(d1, d3)
def test_to_dict_1d(self): """should successfully produce a 1D dict""" data = { "ABAYE2984": { "ABAYE2984": 0, "Atu3667": None, "Avin_42730": 0.6381173875591908, "BAA10469": None, }, "Atu3667": { "ABAYE2984": None, "Atu3667": 0, "Avin_42730": 2.3682377869318993, "BAA10469": None, }, "Avin_42730": { "ABAYE2984": 0.6381173875591908, "Atu3667": 2.3682377869318993, "Avin_42730": 0, "BAA10469": 1.8515731266342546, }, "BAA10469": { "ABAYE2984": None, "Atu3667": None, "Avin_42730": 1.8515731266342546, "BAA10469": 0, }, } darr = DictArray(data, dtype="O") expect = { (n1, n2): darr[n1, n2] for n1 in darr.template.names[0] for n2 in darr.template.names[1] } self.assertEqual(darr.to_dict(flatten=True), expect) darr = DictArrayTemplate(["s1", "s2"], ["s1", "s2"]).wrap( [[0.0, 0.25], [0.25, 0.0]] ) self.assertEqual( darr.to_dict(flatten=True), { ("s1", "s2"): 0.25, ("s2", "s1"): 0.25, ("s1", "s1"): 0.0, ("s2", "s2"): 0.0, }, )
def test_inputs_from_dict_array(self): """inputs_from_dict_array makes an array object and PhyloNode list""" twod = { "1": { "1": 0, "2": 0.86, "3": 0.92 }, "2": { "1": 0.86, "2": 0, "3": 0.67 }, "3": { "1": 0.92, "2": 0.67, "3": 0 }, } matrix_d2d = DictArray(twod) matrix_array, PhyloNode_order = inputs_from_dict_array(matrix_d2d) self.assertEqual(PhyloNode_order[0].name, "1") self.assertEqual(PhyloNode_order[2].name, "3") assert_allclose(matrix_array[0][2], 0.92) assert_allclose(matrix_array[1][0], 0.86)
def __init__(self, observed, expected=None): """Parameters ------------- observed a DictArray instance, or something that can be converted to one expected provide in the case where you know the prior proportions, otherwise calculated from marginal frequencies """ if not isinstance(observed, DictArray): observed = DictArray(observed) if observed.array.sum() == 0: raise ValueError("at least one value must be > 0") if expected: expected = observed.template.wrap(expected) if observed.array.min() < 0 or expected and expected.array.min() < 0: raise ValueError("negative values encountered") if expected: assert_allclose(observed.array.sum(), expected.array.sum() ), "unequal totals of observed and expected" self._observed = observed self._expected = expected self._residuals = None self._df = None self.shape = observed.shape
def test_valid_setitem(self): """tabular_result works when set correct item type""" tr = tabular_result("null") tr["result"] = make_table(data={"A": [0, 1]}) darr = DictArray({"A": [0, 1]}) tr["result2"] = darr js = tr.to_json() self.assertIsInstance(js, str)
def test_direct_construction(self): """directly construct a dict array""" b = DictArrayTemplate("abc", "ABC").wrap(self.a) data_types = ( [[245, 599]], dict(a=dict(b=4, c=5)), {("a", "b"): 4, ("a", "c"): 5}, dict(a=0, b=35, c=45), b, ) for data in data_types: g = DictArray(data)
def upgma(pairwise_distances): """Uses the UPGMA algorithm to cluster sequences pairwise_distances: a dictionary with pair tuples mapped to a distance returns a PhyloNode object of the UPGMA cluster """ darr = DictArray(pairwise_distances) matrix_a, node_order = inputs_from_dict_array(darr) tree = UPGMA_cluster(matrix_a, node_order, BIG_NUM) index = 0 for node in tree.traverse(): if not node.parent: node.name = "root" elif not node.name: node.name = "edge." + str(index) index += 1 return tree
def __init__( self, header=None, rows=None, row_order=None, digits=4, space=4, title="", missing_data="", max_width=1e100, row_ids=None, legend="", column_templates=None, dtype=None, data_frame=None, format="simple", ): """ Parameters ---------- header column headings rows a 2D dict, list or tuple. If a dict, it must have column headings as top level keys, and common row labels as keys in each column. row_order the order in which rows will be pulled from the twoDdict digits floating point resolution space number of spaces between columns or a string title as implied missing_data character assigned if a row has no entry for a column max_width maximum column width for printing row_ids if True, the 0'th column is used as row identifiers and keys for slicing. legend table legend column_templates dict of column headings or a function that will handle the formatting. dtype optional numpy array typecode. data_frame pandas DataFrame, Table will be created from this format output format when using str(Table) """ if data_frame is not None and not _pandas_available: raise ValueError("data_frame provided when pandas not installed") elif data_frame is not None: if rows or header: warnings.warn("provided rows/header will be over ridden by " "DataFrame") rows = data_frame.to_records(index=False).tolist() header = data_frame.columns.tolist() if type(header) == numpy.ndarray: header = header.tolist() if not header: raise ValueError("header must be provided to Table") elif rows is None: raise ValueError("rows cannot be None") if len(rows) == 0: rows = numpy.empty((0, len(header))) try: num_cols = len(header) assert num_cols > 0 if type(rows) == numpy.ndarray: assert num_cols == rows.shape[1] elif type(rows) == dict: assert num_cols == len(rows) else: assert num_cols == len(rows[0]) except (IndexError, TypeError, AssertionError): raise RuntimeError("header and rows must be provided to Table") header = [str(head) for head in header] if isinstance(rows, dict): rows = convert2DDict(rows, header=header, row_order=row_order) # if row_ids, we select that column as the row identifiers if row_ids is not None: identifiers = [row[0] for row in rows] else: identifiers = len(rows) if not dtype: dtype = "O" DictArray.__init__(self, rows, identifiers, header, dtype=dtype) # forcing all column headings to be strings self._header = _Header([str(head) for head in header]) self._missing_data = missing_data # default title / legend to be empty strings self.title = str(title) if title else "" self.legend = str(legend) if legend else "" try: self.space = " " * space except TypeError: self.space = space self._digits = digits self._row_ids = row_ids self._max_width = max_width # some attributes are not preserved in any file format, so always based # on args self._column_templates = column_templates or {} self.format = format # define the repr() display policy random = 0 if self.shape[0] < 50: head = self.shape[0] tail = None else: head, tail = 5, 5 self._repr_policy = dict(head=tail, tail=tail, random=random)
def row_totals(self): """returns totalled row values""" row_sums = self.array.sum(axis=1) template = DictArray(1, row_sums.shape[0]) return template.wrap(row_sums)