Ejemplo n.º 1
0
 def test_key_levels(self):
     """DictArray both levels have keys."""
     b = DictArrayTemplate("abc", "ABC").wrap(self.a)
     self.assertEqual(b.keys(), ["a", "b", "c"])
     self.assertEqual(b["a"].keys(), ["A", "B", "C"])
     self.assertEqual(list(b["a"]), [1, 0, 0])
     self.assertEqual(sum(b["a"]), 1)
Ejemplo n.º 2
0
    def test_slicing_combos(self):
        """different mixtures of slicing should work"""
        darr = DictArrayTemplate(list(DNA), list(DNA)).wrap([
            [0.7, 0.1, 0.2, 0.3],
            [0.1, 0.7, 0.1, 0.3],
            [0.3, 0.2, 0.6, 0.3],
            [0.4, 0.1, 0.1, 0.7],
        ])
        got = darr["C":"G", "C":"G"]
        assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]]))

        got = darr[[1, 2], [1, 2]]
        assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]]))

        got = darr[[2, 3], "C"]
        assert_allclose(got.array, numpy.array([0.2, 0.1]))
        got = darr["C", [2, 3]]
        assert_allclose(got.array, numpy.array([0.1, 0.3]))

        got = darr[[1, 2], "T":"A"]
        assert_allclose(got.array, numpy.array([[0.1, 0.7], [0.3, 0.2]]))

        got = darr["T":"A", [1, 2]]
        assert_allclose(got.array, numpy.array([[0.1, 0.2], [0.7, 0.1]]))

        # make sure we cope with keys that are int's
        nums = list(range(1, 5))
        darr = DictArrayTemplate(nums, nums).wrap([
            [0.7, 0.1, 0.2, 0.3],
            [0.1, 0.7, 0.1, 0.3],
            [0.3, 0.2, 0.6, 0.3],
            [0.4, 0.1, 0.1, 0.7],
        ])
        got = darr[[1, 2], [1, 2]]
        assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]]))
Ejemplo n.º 3
0
    def test_convert2DDict(self):
        """convert2DDict produces valid template input"""
        data = dict(a=dict(b=4, c=5))
        vals, row_keys, col_keys = convert2DDict(data)
        self.assertEqual(set(row_keys), set(["a"]))
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), [[4, 5]])
        # row keys, then column
        self.assertEqual(b.template.names, [["a"], ["b", "c"]])

        data = {
            "a": {"a": 0, "b": 1, "e": 0},
            "b": {"a": 1, "b": 0, "e": 4},
            "e": {"a": 0, "b": 4, "e": 0},
        }
        vals, row_keys, col_keys = convert2DDict(data)
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        got = b.to_dict()
        self.assertEqual(got, data)
        self.assertEqual(b.template.names, [["a", "b", "e"], ["a", "b", "e"]])

        data = dict(a=dict(b=4, c=5))
        vals, row_keys, col_keys = convert2DDict(data, make_symmetric=True)
        self.assertEqual(row_keys, col_keys)
        self.assertEqual(vals, [[0, 4, 5], [4, 0, 0], [5, 0, 0]])
Ejemplo n.º 4
0
 def test_get_repr_html(self):
     """exercising method used by parent classes for nice Jupyter display"""
     data = [[3, 7], [2, 8], [5, 5]]
     darr = DictArrayTemplate(list("ABC"), list("ab")).wrap(data)
     got = darr._repr_html_()
     self.assertIsInstance(got, str)
     self.assertTrue(len(got), 100)
Ejemplo n.º 5
0
 def __init__(self,
              default=None,
              name=None,
              dimensions=None,
              dimension=None,
              size=None,
              **kw):
     assert name
     if size is not None:
         pass
     elif default is not None:
         size = len(default)
     elif dimension is not None:
         size = len(dimension[1])
     self.size = size
     if dimension is not None:
         self.internal_dimension = dimension
         (dim_name, dim_cats) = dimension
         self.bin_names = dim_cats
         self.array_template = DictArrayTemplate(dim_cats)
         self.internal_dimensions = (dim_name, )
     if default is None:
         default = self._make_default_value()
     elif self.array_template is not None:
         default = self.array_template.unwrap(default)
     else:
         default = numpy.asarray(default)
     _InputDefn.__init__(self,
                         name=name,
                         default=default,
                         dimensions=dimensions,
                         **kw)
     self.check_value_is_valid(default, True)
Ejemplo n.º 6
0
 def test_convert_1D_dict(self):
     """convert_1D_dict produces valid template input"""
     data = dict(a=0, b=35, c=45)
     vals, keys = convert_1D_dict(data)
     b = DictArrayTemplate(keys)
     b = b.wrap(vals)
     self.assertEqual(b.array.tolist(), [0, 35, 45])
Ejemplo n.º 7
0
    def to_dictarray(self):
        """construct fully enumerated dictarray

        Returns
        -------
        DictArray with dtype of int

        Notes
        -----
        Unobserved combinations have zeros. Result can can be indexed as if it was a numpy array using key values
        """
        from itertools import product

        from cogent3.util.dict_array import DictArrayTemplate

        key = next(iter(self))
        try:
            ndim = 1 if isinstance(key, str) else len(key)
        except TypeError:
            ndim = 1

        if ndim == 1:
            names = sorted(self)
            vals = [self[n] for n in names]
            darr = DictArrayTemplate(names).wrap(vals, dtype=int)
            return darr

        categories = [sorted(set(labels)) for labels in zip(*self)]
        shape = tuple(len(c) for c in categories)
        darr = DictArrayTemplate(*categories).wrap(numpy.zeros(shape, dtype=int))
        for comb in product(*categories):
            indices = [[categories[i].index(c)] for i, c in enumerate(comb)]
            darr.array[tuple(indices)] = self[comb]

        return darr
Ejemplo n.º 8
0
 def test_interpret_index(self):
     """correctly handles just explicitly defined indices"""
     n = ["ab", "dna", "rna"]
     a1D = DictArrayTemplate(n)
     got = a1D.interpret_index(["ab", "rna"])
     self.assertEqual(got[0], ([0, 2], ))
     got = a1D.interpret_index([0, 2])
     self.assertEqual(got[0], ([0, 2], ))
Ejemplo n.º 9
0
 def test_convert_for_dictarray(self):
     """successfully delegates when constructed from a DictArray"""
     a = numpy.identity(3, int)
     b = DictArrayTemplate("abc", "ABC").wrap(a)
     vals, row_keys, col_keys = convert_for_dictarray(b)
     got = DictArrayTemplate(row_keys, col_keys).wrap(vals)
     self.assertEqual(got.array.tolist(), b.array.tolist())
     # the wrap method creates a new array
     self.assertIsNot(got.array, b.array)
Ejemplo n.º 10
0
    def get_all_rate_matrices(self, calibrated=True):
        """returns all rate matrices (Q) as a dict, keyed by scope

        Parameters
        ----------
        calibrated : bool
            scales the rate matrix by branch length for each edge. If a rate
            heterogeneity model, then the matrix is further scaled by rate
            for a bin
        Returns
        -------
        If a single rate matrix, the key is an empty tuple
        """
        defn = self.defn_for["Q"]

        rate_het = self.defn_for.get("rate", False)
        if rate_het:
            bin_index = rate_het.valid_dimensions.index("bin")
            bin_names = [k[bin_index] for k in rate_het.index]
            bin_names = {n: i for i, n in enumerate(bin_names)}
            bin_index = defn.valid_dimensions.index("bin")
        else:
            bin_names = None
            bin_index = None

        used_dims = defn.used_dimensions()
        edge_index = defn.valid_dimensions.index("edge")

        indices = {defn.valid_dimensions.index(k) for k in used_dims}
        if not calibrated:
            indices.add(edge_index)

        if not calibrated and rate_het:
            indices.add(bin_index)

        indices = list(sorted(indices))
        result = {}
        darr_template = DictArrayTemplate(self._motifs, self._motifs)
        for scope, index in defn.index.items():
            q = defn.values[index]  # this gives the appropriate Q
            # from scope we extract only the relevant dimensions
            key = tuple(numpy.take(scope, indices))
            q = q.copy()
            if not calibrated:
                length = self.get_param_value("length", edge=scope[edge_index])
                if rate_het:
                    bdex = bin_names[scope[bin_index]]
                    rate = rate_het.values[bdex]
                    length *= rate
                q *= length
            result[key] = darr_template.wrap(q)
            if not indices and calibrated:
                break  # single rate matrix

        return result
Ejemplo n.º 11
0
    def test_convert_series(self):
        """convert_series produces valid template input"""
        vals, row_keys, col_keys = convert_series([[4], [5]], ["A", "B"], ["a"])
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), [[4], [5]])
        data = [[245, 599]]
        vals, row_keys, col_keys = convert_series(data)
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), data)

        vals, row_keys, col_keys = convert_series(data[0])
        b = DictArrayTemplate(row_keys, col_keys).wrap(vals)
        self.assertEqual(b.array.tolist(), data[0])
Ejemplo n.º 12
0
 def test_convert_for_dictarray(self):
     """convert_for_dictarray correctly delegates"""
     b = DictArrayTemplate("abc", "ABC").wrap(self.a)
     data_types = (
         [[245, 599]],
         dict(a=dict(b=4, c=5)),
         {("a", "b"): 4, ("a", "c"): 5},
         dict(a=0, b=35, c=45),
         b,
     )
     for data in data_types:
         vals, row_keys, col_keys = convert_for_dictarray(data)
         _ = DictArrayTemplate(row_keys, col_keys).wrap(vals)
Ejemplo n.º 13
0
 def test_write(self):
     """exercising write method"""
     data = [[3, 7], [2, 8], [5, 5]]
     darr = DictArrayTemplate(list("ABC"), list("ab")).wrap(data)
     with TemporaryDirectory(dir=".") as dirname:
         outpath = os.path.join(dirname, "delme.tsv")
         darr.write(outpath)
         with open(outpath) as infile:
             contents = [l.strip().split() for l in infile]
         header = contents.pop(0)
         self.assertEqual(header, ["dim-1", "dim-2", "value"])
         got = {(k1, k2): int(v) for k1, k2, v in contents}
         self.assertEqual(got, darr.to_dict(flatten=True))
Ejemplo n.º 14
0
    def reconstruct_ancestral_seqs(self, locus=None):
        """computes the conditional probabilities of each state for each node
        in the tree.

        Parameters
        ----------
        locus
            a named locus

        Returns
        -------
        {node_name: DictArray, ...}

        Notes
        -----
        Alignment columns are rows in the DictArray.
        """
        result = {}
        array_template = None
        for restricted_edge in self._tree.get_edge_vector():
            if restricted_edge.istip():
                continue
            try:
                r = []
                for motif in range(len(self._motifs)):
                    self.set_param_rule(
                        "fixed_motif",
                        value=motif,
                        edge=restricted_edge.name,
                        locus=locus,
                        is_constant=True,
                    )
                    likelihoods = self.get_full_length_likelihoods(locus=locus)
                    r.append(likelihoods)
                    if array_template is None:
                        array_template = DictArrayTemplate(
                            likelihoods.shape[0], self._motifs
                        )
            finally:
                self.set_param_rule(
                    "fixed_motif",
                    value=-1,
                    edge=restricted_edge.name,
                    locus=locus,
                    is_constant=True,
                )
            # dict of site x motif arrays
            result[restricted_edge.name] = array_template.wrap(
                numpy.transpose(numpy.asarray(r))
            )
        return result
Ejemplo n.º 15
0
    def get_rate_matrix_for_edge(self, name, calibrated=True, **kw):
        """returns the rate matrix (Q) for the named edge

        Parameters
        ----------
        name : str
            name of the edge
        calibrated : bool
            If True, the rate matrix is scaled such that
            ``sum(pi_i * Qii) == 1``. If False, the calibrated matrix is
            multiplied by the length parameter (and the rate parameter for a
            bin if it is a rate heterogeneity model).

        Notes
        -----
        If ``calibrated=False``, ``expm(Q)`` will give the same result as
        ``self.get_psub_for_edge(name)``
        """
        # todo handle case of multiple loci
        try:
            array = self.get_param_value("Q", edge=name, **kw)
            array = array.copy()
            if not calibrated:
                length = self.get_param_value("length", edge=name, **kw)
                array *= length
        except KeyError as err:
            if err[0] == "Q" and name != "Q":
                raise RuntimeError("rate matrix not known by this model")
            else:
                raise
        return DictArrayTemplate(self._motifs, self._motifs).wrap(array)
Ejemplo n.º 16
0
    def _get_motif_probs_by_node_tr(self, edges=None, bin=None, locus=None):
        """returns motif probs by node for time-reversible models"""
        mprob_rules = [
            r for r in self.get_param_rules() if "mprob" in r["par_name"]
        ]
        if len(mprob_rules) > 1 or self.model.mprob_model == "monomers":
            raise NotImplementedError

        mprobs = self.get_motif_probs()
        if len(mprobs) != len(self.motifs):
            # a Muse and Gaut model
            expanded = numpy.zeros(len(self.motifs), dtype=float)
            for i, motif in enumerate(self.motifs):
                val = 1.0
                for b in motif:
                    val *= mprobs[b]
                expanded[i] = val
            mprobs = expanded / expanded.sum()
        else:
            mprobs = [mprobs[m] for m in self.motifs]
        edges = []
        values = []
        for e in self.tree.postorder():
            edges.append(e.name)
            values.append(mprobs)

        return DictArrayTemplate(edges, self.motifs).wrap(values)
Ejemplo n.º 17
0
    def test_deserialise_tabular_dictarray(self):
        """correctly deserialises DictArray"""
        from cogent3.util.dict_array import DictArrayTemplate

        template = DictArrayTemplate(5, ["id", "foo", "bar"])
        data = [
            [1, "abc", 11],
            [2, "bca", 22],
            [3, "cab", 33],
            [4, "abc", 44],
            [5, "bca", 55],
        ]
        darr = template.wrap(data)
        json = darr.to_json()
        got = deserialise_object(json)
        self.assertEqual(got.to_dict(), darr.to_dict())
Ejemplo n.º 18
0
 def get_bin_probs(self, locus=None):
     hmm = self.get_param_value("bindex", locus=locus)
     lhs = [
         self.get_param_value("lh", locus=locus, bin=bin) for bin in self.bin_names
     ]
     array = hmm.get_posterior_probs(*lhs)
     return DictArrayTemplate(self.bin_names, array.shape[1]).wrap(array)
Ejemplo n.º 19
0
 def test_to_string(self):
     darr = DictArrayTemplate(2,
                              2).wrap([[3.123456789, 2 * 3.123456789],
                                       [3 * 3.123456789, 4 * 3.123456789]])
     self.assertEqual(
         darr.to_string(sep=","),
         "dim-1,dim-2,value\n0,0,3.123456789\n0,1,6.246913578\n1,0,9.370370367\n1,1,12.493827156",
     )
     self.assertEqual(
         darr.to_string(),
         "dim-1\tdim-2\tvalue\n0\t0\t3.123456789\n0\t1\t6.246913578\n1\t0\t9.370370367\n1\t1\t12.493827156",
     )
     self.assertEqual(
         darr.to_string(sep=" "),
         "dim-1 dim-2 value\n0 0 3.123456789\n0 1 6.246913578\n1 0 9.370370367\n1 1 12.493827156",
     )
Ejemplo n.º 20
0
 def get_psub_for_edge(self, name, **kw):
     """returns the substitution probability matrix for the named edge"""
     try:
         # For PartialyDiscretePsubsDefn
         array = self.get_param_value("dpsubs", edge=name, **kw)
     except KeyError:
         array = self.get_param_value("psubs", edge=name, **kw)
     return DictArrayTemplate(self._motifs, self._motifs).wrap(array)
Ejemplo n.º 21
0
    def get_all_psubs(self):
        """returns all psubs as a dict keyed by used dimensions"""
        try:
            defn = self.defn_for["dsubs"]
        except KeyError:
            defn = self.defn_for["psubs"]

        used_dims = defn.used_dimensions()
        vdims = defn.valid_dimensions
        indices = [vdims.index(k) for k in used_dims if k in vdims]
        result = {}
        darr_template = DictArrayTemplate(self._motifs, self._motifs)
        for scope, index in defn.index.items():
            psub = defn.values[index]
            key = tuple(numpy.take(scope, indices))
            result[key] = darr_template.wrap(psub)
        return result
Ejemplo n.º 22
0
    def test_category_counts_from_non_int_arrays(self):
        """handles object and float numpy array, fails if float"""
        a = numpy.array([[31, 36], [58, 138]], dtype=object)
        darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a)
        got = CategoryCounts(darr)
        assert_allclose(got.observed.array.tolist(), a.tolist())

        for dtype in (object, float):
            with self.assertRaises(TypeError):
                a = numpy.array([[31.3, 36], [58, 138]], dtype=dtype)
                darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a)
                _ = CategoryCounts(darr)

        # negative values disallowed
        with self.assertRaises(ValueError):
            a = numpy.array([[31, -36], [58, 138]], dtype=int)
            darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a)
            _ = CategoryCounts(darr)
Ejemplo n.º 23
0
    def __init__(self, data, motifs, row_indices=None, dtype=None):
        """
        data
            series of numbers, can be numpy array, CategoryCounter, dict instances
        row_indices
            row_indices correspond to original indexes, defaults to length of
            motif
        """
        # todo validate that motifs are strings and row_indices are ints or
        # strings
        # todo change row_indices argument name to row_keys
        if isinstance(data, numpy.ndarray):
            some_data = data.any()
        else:
            some_data = any(data)

        if not some_data or len(data) == 0:
            raise ValueError("Must provide data")

        try:
            len(data[0])
        except TypeError:
            ndim = 1
        else:
            ndim = 2
        num_elements = len(data) if ndim == 1 else len(data[0])
        if num_elements != len(motifs):
            raise ValueError(
                f"number of data elements {len(data[0])} != {len(motifs)}")
        motifs = tuple(motifs)

        # create template
        if row_indices is None and ndim == 2:
            row_indices = len(data)

        template = DictArrayTemplate(row_indices, motifs)
        darr = template.wrap(data)
        try:
            darr.array.astype(dtype, casting="safe")
        except TypeError as err:
            raise ValueError(err)
        self.__dict__.update(darr.__dict__)
        self.motifs = motifs
        self.motif_length = len(motifs[0])
Ejemplo n.º 24
0
 def test_get_logo_missing(self):
     """copes with positions with no values"""
     data = [
         [0.1, 0.3, 0.5, 0.1],
         [0.05, 0.8, 0.05, 0.1],
         [0, 0, 0, 0],
         [0.7, 0.1, 0.1, 0.1],
         [0.6, 0.15, 0.05, 0.2],
     ]
     data = DictArrayTemplate(5, "ACGT").wrap(data)
     d = get_logo(data)
Ejemplo n.º 25
0
 def test_get_logo(self):
     """returns Drawable"""
     data = [
         [0.1, 0.3, 0.5, 0.1],
         [0.25, 0.25, 0.25, 0.25],
         [0.05, 0.8, 0.05, 0.1],
         [0.7, 0.1, 0.1, 0.1],
         [0.6, 0.15, 0.05, 0.2],
     ]
     data = DictArrayTemplate(5, "ACGT").wrap(data)
     d = get_logo(data)
Ejemplo n.º 26
0
 def test_numpy_ops(self):
     """DictArray should work properly in numpy operations."""
     darr = DictArrayTemplate(list(DNA), list(DNA)).wrap([
         [0.7, 0.1, 0.1, 0.1],
         [0.1, 0.7, 0.1, 0.1],
         [0.1, 0.1, 0.7, 0.1],
         [0.1, 0.1, 0.1, 0.7],
     ])
     mprobs = numpy.array([0.25, 0.25, 0.25, 0.25])
     assert_allclose(mprobs.dot(darr), [0.25, 0.25, 0.25, 0.25])
     assert_allclose(numpy.dot(mprobs, darr), [0.25, 0.25, 0.25, 0.25])
Ejemplo n.º 27
0
 def test_direct_construction(self):
     """directly construct a dict array"""
     b = DictArrayTemplate("abc", "ABC").wrap(self.a)
     data_types = (
         [[245, 599]],
         dict(a=dict(b=4, c=5)),
         {("a", "b"): 4, ("a", "c"): 5},
         dict(a=0, b=35, c=45),
         b,
     )
     for data in data_types:
         g = DictArray(data)
Ejemplo n.º 28
0
 def test_getitem(self):
     """correctly slices"""
     darr = DictArrayTemplate(list(DNA), list(DNA)).wrap([
         [0.7, 0.1, 0.1, 0.1],
         [0.1, 0.7, 0.1, 0.1],
         [0.1, 0.1, 0.7, 0.1],
         [0.1, 0.1, 0.1, 0.7],
     ])
     r = darr[:, "A":"G"]
     assert_allclose(r.to_array(), [[0.1], [0.1], [0.7], [0.1]])
     r = darr[2:, "A":"G"]
     assert_allclose(r.to_array(), [[0.7], [0.1]])
Ejemplo n.º 29
0
    def test_get_repr_html(self):
        """exercising method used by parent classes for nice Jupyter display"""
        data = [[3, 7], [2, 8], [5, 5]]
        darr = DictArrayTemplate(list("ABC"), list("ab")).wrap(data)
        got = darr._repr_html_()
        self.assertIsInstance(got, str)
        self.assertTrue(len(got), 100)

        # case where 1D array
        a = [4, 6, 4, 2]
        darr = DictArrayTemplate(["A", "C", "G", "T"]).wrap(a)
        got = darr._repr_html_()
        self.assertTrue('class="index"' not in got)

        # case of 3D array
        d3 = numpy.arange(8).reshape((2, 2, 2))
        darr = DictArrayTemplate(2, 2, 2).wrap(d3)
        got = darr._repr_html_()
        self.assertIn("3 dimensional", got)
Ejemplo n.º 30
0
 def test_to_dict_nested(self):
     """DictArray.to_dict() should convert nested DictArray instances to
     dict's too."""
     a = numpy.identity(3, int)
     b = DictArrayTemplate("abc", "ABC")
     b = b.wrap(a)
     self.assertEqual(b.array.tolist(), [[1, 0, 0], [0, 1, 0], [0, 0, 1]])
     c = DictArrayTemplate("de", "DE").wrap([[b, b], [b, b]])
     self.assertTrue(isinstance(c.to_dict()["d"], dict))