Esempio n. 1
0
    def test_slicing_combos(self):
        """different mixtures of slicing should work"""
        darr = DictArrayTemplate(list(DNA), list(DNA)).wrap([
            [0.7, 0.1, 0.2, 0.3],
            [0.1, 0.7, 0.1, 0.3],
            [0.3, 0.2, 0.6, 0.3],
            [0.4, 0.1, 0.1, 0.7],
        ])
        got = darr["C":"G", "C":"G"]
        assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]]))

        got = darr[[1, 2], [1, 2]]
        assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]]))

        got = darr[[2, 3], "C"]
        assert_allclose(got.array, numpy.array([0.2, 0.1]))
        got = darr["C", [2, 3]]
        assert_allclose(got.array, numpy.array([0.1, 0.3]))

        got = darr[[1, 2], "T":"A"]
        assert_allclose(got.array, numpy.array([[0.1, 0.7], [0.3, 0.2]]))

        got = darr["T":"A", [1, 2]]
        assert_allclose(got.array, numpy.array([[0.1, 0.2], [0.7, 0.1]]))

        # make sure we cope with keys that are int's
        nums = list(range(1, 5))
        darr = DictArrayTemplate(nums, nums).wrap([
            [0.7, 0.1, 0.2, 0.3],
            [0.1, 0.7, 0.1, 0.3],
            [0.3, 0.2, 0.6, 0.3],
            [0.4, 0.1, 0.1, 0.7],
        ])
        got = darr[[1, 2], [1, 2]]
        assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]]))
Esempio n. 2
0
    def test_convert2DDict(self):
        """convert2DDict produces valid template input"""
        data = dict(a=dict(b=4, c=5))
        vals, row_keys, col_keys = convert2DDict(data)
        self.assertEqual(set(row_keys), set(["a"]))
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), [[4, 5]])
        # row keys, then column
        self.assertEqual(b.template.names, [["a"], ["b", "c"]])

        data = {
            "a": {"a": 0, "b": 1, "e": 0},
            "b": {"a": 1, "b": 0, "e": 4},
            "e": {"a": 0, "b": 4, "e": 0},
        }
        vals, row_keys, col_keys = convert2DDict(data)
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        got = b.to_dict()
        self.assertEqual(got, data)
        self.assertEqual(b.template.names, [["a", "b", "e"], ["a", "b", "e"]])

        data = dict(a=dict(b=4, c=5))
        vals, row_keys, col_keys = convert2DDict(data, make_symmetric=True)
        self.assertEqual(row_keys, col_keys)
        self.assertEqual(vals, [[0, 4, 5], [4, 0, 0], [5, 0, 0]])
Esempio n. 3
0
    def to_dictarray(self):
        """construct fully enumerated dictarray

        Returns
        -------
        DictArray with dtype of int

        Notes
        -----
        Unobserved combinations have zeros. Result can can be indexed as if it was a numpy array using key values
        """
        from itertools import product

        from cogent3.util.dict_array import DictArrayTemplate

        key = next(iter(self))
        try:
            ndim = 1 if isinstance(key, str) else len(key)
        except TypeError:
            ndim = 1

        if ndim == 1:
            names = sorted(self)
            vals = [self[n] for n in names]
            darr = DictArrayTemplate(names).wrap(vals, dtype=int)
            return darr

        categories = [sorted(set(labels)) for labels in zip(*self)]
        shape = tuple(len(c) for c in categories)
        darr = DictArrayTemplate(*categories).wrap(numpy.zeros(shape, dtype=int))
        for comb in product(*categories):
            indices = [[categories[i].index(c)] for i, c in enumerate(comb)]
            darr.array[tuple(indices)] = self[comb]

        return darr
Esempio n. 4
0
 def test_to_dict_nested(self):
     """DictArray.to_dict() should convert nested DictArray instances to
     dict's too."""
     a = numpy.identity(3, int)
     b = DictArrayTemplate("abc", "ABC")
     b = b.wrap(a)
     self.assertEqual(b.array.tolist(), [[1, 0, 0], [0, 1, 0], [0, 0, 1]])
     c = DictArrayTemplate("de", "DE").wrap([[b, b], [b, b]])
     self.assertTrue(isinstance(c.to_dict()["d"], dict))
Esempio n. 5
0
 def test_convert_for_dictarray(self):
     """successfully delegates when constructed from a DictArray"""
     a = numpy.identity(3, int)
     b = DictArrayTemplate("abc", "ABC").wrap(a)
     vals, row_keys, col_keys = convert_for_dictarray(b)
     got = DictArrayTemplate(row_keys, col_keys).wrap(vals)
     self.assertEqual(got.array.tolist(), b.array.tolist())
     # the wrap method creates a new array
     self.assertIsNot(got.array, b.array)
Esempio n. 6
0
    def test_convert_series(self):
        """convert_series produces valid template input"""
        vals, row_keys, col_keys = convert_series([[4], [5]], ["A", "B"], ["a"])
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), [[4], [5]])
        data = [[245, 599]]
        vals, row_keys, col_keys = convert_series(data)
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), data)

        vals, row_keys, col_keys = convert_series(data[0])
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), data[0])
Esempio n. 7
0
 def test_convert_for_dictarray(self):
     """convert_for_dictarray correctly delegates"""
     b = DictArrayTemplate("abc", "ABC").wrap(self.a)
     data_types = (
         [[245, 599]],
         dict(a=dict(b=4, c=5)),
         {("a", "b"): 4, ("a", "c"): 5},
         dict(a=0, b=35, c=45),
         b,
     )
     for data in data_types:
         vals, row_keys, col_keys = convert_for_dictarray(data)
         _ = DictArrayTemplate(row_keys, col_keys).wrap(vals)
Esempio n. 8
0
 def test_convert_1D_dict(self):
     """convert_1D_dict produces valid template input"""
     data = dict(a=0, b=35, c=45)
     vals, keys = convert_1D_dict(data)
     b = DictArrayTemplate(keys)
     b = b.wrap(vals)
     self.assertEqual(b.array.tolist(), [0, 35, 45])
Esempio n. 9
0
    def _get_motif_probs_by_node_tr(self, edges=None, bin=None, locus=None):
        """returns motif probs by node for time-reversible models"""
        mprob_rules = [
            r for r in self.get_param_rules() if "mprob" in r["par_name"]
        ]
        if len(mprob_rules) > 1 or self.model.mprob_model == "monomers":
            raise NotImplementedError

        mprobs = self.get_motif_probs()
        if len(mprobs) != len(self.motifs):
            # a Muse and Gaut model
            expanded = numpy.zeros(len(self.motifs), dtype=float)
            for i, motif in enumerate(self.motifs):
                val = 1.0
                for b in motif:
                    val *= mprobs[b]
                expanded[i] = val
            mprobs = expanded / expanded.sum()
        else:
            mprobs = [mprobs[m] for m in self.motifs]
        edges = []
        values = []
        for e in self.tree.postorder():
            edges.append(e.name)
            values.append(mprobs)

        return DictArrayTemplate(edges, self.motifs).wrap(values)
Esempio n. 10
0
 def test_get_repr_html(self):
     """exercising method used by parent classes for nice Jupyter display"""
     data = [[3, 7], [2, 8], [5, 5]]
     darr = DictArrayTemplate(list("ABC"), list("ab")).wrap(data)
     got = darr._repr_html_()
     self.assertIsInstance(got, str)
     self.assertTrue(len(got), 100)
Esempio n. 11
0
 def __init__(self,
              default=None,
              name=None,
              dimensions=None,
              dimension=None,
              size=None,
              **kw):
     assert name
     if size is not None:
         pass
     elif default is not None:
         size = len(default)
     elif dimension is not None:
         size = len(dimension[1])
     self.size = size
     if dimension is not None:
         self.internal_dimension = dimension
         (dim_name, dim_cats) = dimension
         self.bin_names = dim_cats
         self.array_template = DictArrayTemplate(dim_cats)
         self.internal_dimensions = (dim_name, )
     if default is None:
         default = self._make_default_value()
     elif self.array_template is not None:
         default = self.array_template.unwrap(default)
     else:
         default = numpy.asarray(default)
     _InputDefn.__init__(self,
                         name=name,
                         default=default,
                         dimensions=dimensions,
                         **kw)
     self.check_value_is_valid(default, True)
Esempio n. 12
0
 def test_key_levels(self):
     """DictArray both levels have keys."""
     b = DictArrayTemplate("abc", "ABC").wrap(self.a)
     self.assertEqual(b.keys(), ["a", "b", "c"])
     self.assertEqual(b["a"].keys(), ["A", "B", "C"])
     self.assertEqual(list(b["a"]), [1, 0, 0])
     self.assertEqual(sum(b["a"]), 1)
Esempio n. 13
0
    def get_rate_matrix_for_edge(self, name, calibrated=True, **kw):
        """returns the rate matrix (Q) for the named edge

        Parameters
        ----------
        name : str
            name of the edge
        calibrated : bool
            If True, the rate matrix is scaled such that
            ``sum(pi_i * Qii) == 1``. If False, the calibrated matrix is
            multiplied by the length parameter (and the rate parameter for a
            bin if it is a rate heterogeneity model).

        Notes
        -----
        If ``calibrated=False``, ``expm(Q)`` will give the same result as
        ``self.get_psub_for_edge(name)``
        """
        # todo handle case of multiple loci
        try:
            array = self.get_param_value("Q", edge=name, **kw)
            array = array.copy()
            if not calibrated:
                length = self.get_param_value("length", edge=name, **kw)
                array *= length
        except KeyError as err:
            if err[0] == "Q" and name != "Q":
                raise RuntimeError("rate matrix not known by this model")
            else:
                raise
        return DictArrayTemplate(self._motifs, self._motifs).wrap(array)
Esempio n. 14
0
 def get_bin_probs(self, locus=None):
     hmm = self.get_param_value("bindex", locus=locus)
     lhs = [
         self.get_param_value("lh", locus=locus, bin=bin) for bin in self.bin_names
     ]
     array = hmm.get_posterior_probs(*lhs)
     return DictArrayTemplate(self.bin_names, array.shape[1]).wrap(array)
Esempio n. 15
0
 def get_psub_for_edge(self, name, **kw):
     """returns the substitution probability matrix for the named edge"""
     try:
         # For PartialyDiscretePsubsDefn
         array = self.get_param_value("dpsubs", edge=name, **kw)
     except KeyError:
         array = self.get_param_value("psubs", edge=name, **kw)
     return DictArrayTemplate(self._motifs, self._motifs).wrap(array)
Esempio n. 16
0
    def test_str_labels(self):
        """DictArray with str labels or numpy U dtype"""
        b = DictArrayTemplate(["Ab", "Bb", "Cb"],
                              ["Db", "Eb", "Fb"]).wrap(self.a)
        self.assertEqual(b.keys(), ["Ab", "Bb", "Cb"])
        self.assertEqual(b[0].keys(), ["Db", "Eb", "Fb"])
        self.assertEqual(b["Ab", "Eb"], 0)
        self.assertEqual(b["Bb", "Eb"], 1)

        b = DictArrayTemplate(
            numpy.array(["Ab", "Bb", "Cb"], dtype="U"),
            numpy.array(["Db", "Eb", "Fb"], dtype="U"),
        ).wrap(self.a)
        self.assertEqual(b.keys(), ["Ab", "Bb", "Cb"])
        self.assertEqual(b[0].keys(), ["Db", "Eb", "Fb"])
        self.assertEqual(b["Ab", "Eb"], 0)
        self.assertEqual(b["Bb", "Eb"], 1)
Esempio n. 17
0
 def test_to_dict(self):
     """DictArray should convert 1D / 2D arrays with/without named row"""
     # 1D data, only 1D keys provided
     data = [0, 35, 45]
     keys = "a", "b", "c"
     darr = DictArrayTemplate(keys).wrap(data)
     self.assertEqual(darr.to_dict(), dict(zip(keys, data)))
     # 2D data, 2D keys, both string, provided
     data = [[0, 35, 45]]
     darr = DictArrayTemplate(["0"], keys).wrap(data)
     darr.to_dict()
     self.assertEqual(darr.to_dict(), {"0": {"a": 0, "b": 35, "c": 45}})
     # 2D data, 2D keys, one int, one string, provided
     darr = DictArrayTemplate([1], keys).wrap(data)
     self.assertEqual(darr.to_dict(), {1: {"a": 0, "b": 35, "c": 45}})
     darr = DictArrayTemplate([0], keys).wrap(data)
     self.assertEqual(darr.to_dict(), {0: {"a": 0, "b": 35, "c": 45}})
Esempio n. 18
0
 def test_interpret_index(self):
     """correctly handles just explicitly defined indices"""
     n = ["ab", "dna", "rna"]
     a1D = DictArrayTemplate(n)
     got = a1D.interpret_index(["ab", "rna"])
     self.assertEqual(got[0], ([0, 2], ))
     got = a1D.interpret_index([0, 2])
     self.assertEqual(got[0], ([0, 2], ))
Esempio n. 19
0
    def test_category_counts_from_non_int_arrays(self):
        """handles object and float numpy array, fails if float"""
        a = numpy.array([[31, 36], [58, 138]], dtype=object)
        darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a)
        got = CategoryCounts(darr)
        assert_allclose(got.observed.array.tolist(), a.tolist())

        for dtype in (object, float):
            with self.assertRaises(TypeError):
                a = numpy.array([[31.3, 36], [58, 138]], dtype=dtype)
                darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a)
                _ = CategoryCounts(darr)

        # negative values disallowed
        with self.assertRaises(ValueError):
            a = numpy.array([[31, -36], [58, 138]], dtype=int)
            darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a)
            _ = CategoryCounts(darr)
Esempio n. 20
0
    def test_get_repr_html(self):
        """exercising method used by parent classes for nice Jupyter display"""
        data = [[3, 7], [2, 8], [5, 5]]
        darr = DictArrayTemplate(list("ABC"), list("ab")).wrap(data)
        got = darr._repr_html_()
        self.assertIsInstance(got, str)
        self.assertTrue(len(got), 100)

        # case where 1D array
        a = [4, 6, 4, 2]
        darr = DictArrayTemplate(["A", "C", "G", "T"]).wrap(a)
        got = darr._repr_html_()
        self.assertTrue('class="index"' not in got)

        # case of 3D array
        d3 = numpy.arange(8).reshape((2, 2, 2))
        darr = DictArrayTemplate(2, 2, 2).wrap(d3)
        got = darr._repr_html_()
        self.assertIn("3 dimensional", got)
Esempio n. 21
0
    def get_all_rate_matrices(self, calibrated=True):
        """returns all rate matrices (Q) as a dict, keyed by scope

        Parameters
        ----------
        calibrated : bool
            scales the rate matrix by branch length for each edge. If a rate
            heterogeneity model, then the matrix is further scaled by rate
            for a bin
        Returns
        -------
        If a single rate matrix, the key is an empty tuple
        """
        defn = self.defn_for["Q"]

        rate_het = self.defn_for.get("rate", False)
        if rate_het:
            bin_index = rate_het.valid_dimensions.index("bin")
            bin_names = [k[bin_index] for k in rate_het.index]
            bin_names = {n: i for i, n in enumerate(bin_names)}
            bin_index = defn.valid_dimensions.index("bin")
        else:
            bin_names = None
            bin_index = None

        used_dims = defn.used_dimensions()
        edge_index = defn.valid_dimensions.index("edge")

        indices = {defn.valid_dimensions.index(k) for k in used_dims}
        if not calibrated:
            indices.add(edge_index)

        if not calibrated and rate_het:
            indices.add(bin_index)

        indices = list(sorted(indices))
        result = {}
        darr_template = DictArrayTemplate(self._motifs, self._motifs)
        for scope, index in defn.index.items():
            q = defn.values[index]  # this gives the appropriate Q
            # from scope we extract only the relevant dimensions
            key = tuple(numpy.take(scope, indices))
            q = q.copy()
            if not calibrated:
                length = self.get_param_value("length", edge=scope[edge_index])
                if rate_het:
                    bdex = bin_names[scope[bin_index]]
                    rate = rate_het.values[bdex]
                    length *= rate
                q *= length
            result[key] = darr_template.wrap(q)
            if not indices and calibrated:
                break  # single rate matrix

        return result
Esempio n. 22
0
 def test_to_normalized(self):
     """computes frequencies across correct dimension"""
     data = [[3, 7], [2, 8], [5, 5]]
     darr = DictArrayTemplate(list("ABC"), list("ab")).wrap(data)
     row_normal = darr.to_normalized(by_row=True)
     assert_allclose(row_normal.array, [[0.3, 0.7], [0.2, 0.8], [0.5, 0.5]])
     col_normal = darr.to_normalized(by_column=True)
     assert_allclose(col_normal.array, [[0.3, 7 / 20], [0.2, 8 / 20], [0.5, 5 / 20]])
     # trying to do both raises AssertionError
     with self.assertRaises(AssertionError):
         darr.to_normalized(by_row=True, by_column=True)
Esempio n. 23
0
 def test_numpy_ops(self):
     """DictArray should work properly in numpy operations."""
     darr = DictArrayTemplate(list(DNA), list(DNA)).wrap([
         [0.7, 0.1, 0.1, 0.1],
         [0.1, 0.7, 0.1, 0.1],
         [0.1, 0.1, 0.7, 0.1],
         [0.1, 0.1, 0.1, 0.7],
     ])
     mprobs = numpy.array([0.25, 0.25, 0.25, 0.25])
     assert_allclose(mprobs.dot(darr), [0.25, 0.25, 0.25, 0.25])
     assert_allclose(numpy.dot(mprobs, darr), [0.25, 0.25, 0.25, 0.25])
Esempio n. 24
0
 def test_get_logo_missing(self):
     """copes with positions with no values"""
     data = [
         [0.1, 0.3, 0.5, 0.1],
         [0.05, 0.8, 0.05, 0.1],
         [0, 0, 0, 0],
         [0.7, 0.1, 0.1, 0.1],
         [0.6, 0.15, 0.05, 0.2],
     ]
     data = DictArrayTemplate(5, "ACGT").wrap(data)
     d = get_logo(data)
Esempio n. 25
0
 def test_get_logo(self):
     """returns Drawable"""
     data = [
         [0.1, 0.3, 0.5, 0.1],
         [0.25, 0.25, 0.25, 0.25],
         [0.05, 0.8, 0.05, 0.1],
         [0.7, 0.1, 0.1, 0.1],
         [0.6, 0.15, 0.05, 0.2],
     ]
     data = DictArrayTemplate(5, "ACGT").wrap(data)
     d = get_logo(data)
Esempio n. 26
0
 def test_getitem(self):
     """correctly slices"""
     darr = DictArrayTemplate(list(DNA), list(DNA)).wrap([
         [0.7, 0.1, 0.1, 0.1],
         [0.1, 0.7, 0.1, 0.1],
         [0.1, 0.1, 0.7, 0.1],
         [0.1, 0.1, 0.1, 0.7],
     ])
     r = darr[:, "A":"G"]
     assert_allclose(r.to_array(), [[0.1], [0.1], [0.7], [0.1]])
     r = darr[2:, "A":"G"]
     assert_allclose(r.to_array(), [[0.7], [0.1]])
Esempio n. 27
0
 def test_direct_construction(self):
     """directly construct a dict array"""
     b = DictArrayTemplate("abc", "ABC").wrap(self.a)
     data_types = (
         [[245, 599]],
         dict(a=dict(b=4, c=5)),
         {("a", "b"): 4, ("a", "c"): 5},
         dict(a=0, b=35, c=45),
         b,
     )
     for data in data_types:
         g = DictArray(data)
Esempio n. 28
0
 def test_write(self):
     """exercising write method"""
     data = [[3, 7], [2, 8], [5, 5]]
     darr = DictArrayTemplate(list("ABC"), list("ab")).wrap(data)
     with TemporaryDirectory(dir=".") as dirname:
         outpath = os.path.join(dirname, "delme.tsv")
         darr.write(outpath)
         with open(outpath) as infile:
             contents = [l.strip().split() for l in infile]
         header = contents.pop(0)
         self.assertEqual(header, ["dim-1", "dim-2", "value"])
         got = {(k1, k2): int(v) for k1, k2, v in contents}
         self.assertEqual(got, darr.to_dict(flatten=True))
Esempio n. 29
0
    def reconstruct_ancestral_seqs(self, locus=None):
        """computes the conditional probabilities of each state for each node
        in the tree.

        Parameters
        ----------
        locus
            a named locus

        Returns
        -------
        {node_name: DictArray, ...}

        Notes
        -----
        Alignment columns are rows in the DictArray.
        """
        result = {}
        array_template = None
        for restricted_edge in self._tree.get_edge_vector():
            if restricted_edge.istip():
                continue
            try:
                r = []
                for motif in range(len(self._motifs)):
                    self.set_param_rule(
                        "fixed_motif",
                        value=motif,
                        edge=restricted_edge.name,
                        locus=locus,
                        is_constant=True,
                    )
                    likelihoods = self.get_full_length_likelihoods(locus=locus)
                    r.append(likelihoods)
                    if array_template is None:
                        array_template = DictArrayTemplate(
                            likelihoods.shape[0], self._motifs
                        )
            finally:
                self.set_param_rule(
                    "fixed_motif",
                    value=-1,
                    edge=restricted_edge.name,
                    locus=locus,
                    is_constant=True,
                )
            # dict of site x motif arrays
            result[restricted_edge.name] = array_template.wrap(
                numpy.transpose(numpy.asarray(r))
            )
        return result
Esempio n. 30
0
    def get_motif_probs_by_node(self, edges=None, bin=None, locus=None):
        from cogent3.evolve.substitution_model import TimeReversible

        if isinstance(self.model, TimeReversible):
            return self._get_motif_probs_by_node_tr(edges=edges, bin=bin, locus=locus)

        kw = dict(bin=bin, locus=locus)
        mprobs = self.get_param_value("mprobs", **kw)
        mprobs = self._model.calc_word_probs(mprobs)
        result = self._nodeMotifProbs(self._tree, mprobs, kw)
        if edges is None:
            edges = [name for (name, m) in result]
        result = dict(result)
        values = [result[name] for name in edges]
        return DictArrayTemplate(edges, self._mprob_motifs).wrap(values)