def test_convert_1D_dict(self): """convert_1D_dict produces valid template input""" data = dict(a=0, b=35, c=45) vals, keys = convert_1D_dict(data) b = DictArrayTemplate(keys) b = b.wrap(vals) self.assertEqual(b.array.tolist(), [0, 35, 45])
def test_to_dict_nested(self): """DictArray.to_dict() should convert nested DictArray instances to dict's too.""" a = numpy.identity(3, int) b = DictArrayTemplate("abc", "ABC") b = b.wrap(a) self.assertEqual(b.array.tolist(), [[1, 0, 0], [0, 1, 0], [0, 0, 1]]) c = DictArrayTemplate("de", "DE").wrap([[b, b], [b, b]]) self.assertTrue(isinstance(c.to_dict()["d"], dict))
def get_all_rate_matrices(self, calibrated=True): """returns all rate matrices (Q) as a dict, keyed by scope Parameters ---------- calibrated : bool scales the rate matrix by branch length for each edge. If a rate heterogeneity model, then the matrix is further scaled by rate for a bin Returns ------- If a single rate matrix, the key is an empty tuple """ defn = self.defn_for["Q"] rate_het = self.defn_for.get("rate", False) if rate_het: bin_index = rate_het.valid_dimensions.index("bin") bin_names = [k[bin_index] for k in rate_het.index] bin_names = {n: i for i, n in enumerate(bin_names)} bin_index = defn.valid_dimensions.index("bin") else: bin_names = None bin_index = None used_dims = defn.used_dimensions() edge_index = defn.valid_dimensions.index("edge") indices = {defn.valid_dimensions.index(k) for k in used_dims} if not calibrated: indices.add(edge_index) if not calibrated and rate_het: indices.add(bin_index) indices = list(sorted(indices)) result = {} darr_template = DictArrayTemplate(self._motifs, self._motifs) for scope, index in defn.index.items(): q = defn.values[index] # this gives the appropriate Q # from scope we extract only the relevant dimensions key = tuple(numpy.take(scope, indices)) q = q.copy() if not calibrated: length = self.get_param_value("length", edge=scope[edge_index]) if rate_het: bdex = bin_names[scope[bin_index]] rate = rate_het.values[bdex] length *= rate q *= length result[key] = darr_template.wrap(q) if not indices and calibrated: break # single rate matrix return result
def reconstruct_ancestral_seqs(self, locus=None): """computes the conditional probabilities of each state for each node in the tree. Parameters ---------- locus a named locus Returns ------- {node_name: DictArray, ...} Notes ----- Alignment columns are rows in the DictArray. """ result = {} array_template = None for restricted_edge in self._tree.get_edge_vector(): if restricted_edge.istip(): continue try: r = [] for motif in range(len(self._motifs)): self.set_param_rule( "fixed_motif", value=motif, edge=restricted_edge.name, locus=locus, is_constant=True, ) likelihoods = self.get_full_length_likelihoods(locus=locus) r.append(likelihoods) if array_template is None: array_template = DictArrayTemplate( likelihoods.shape[0], self._motifs ) finally: self.set_param_rule( "fixed_motif", value=-1, edge=restricted_edge.name, locus=locus, is_constant=True, ) # dict of site x motif arrays result[restricted_edge.name] = array_template.wrap( numpy.transpose(numpy.asarray(r)) ) return result
def test_deserialise_tabular_dictarray(self): """correctly deserialises DictArray""" from cogent3.util.dict_array import DictArrayTemplate template = DictArrayTemplate(5, ["id", "foo", "bar"]) data = [ [1, "abc", 11], [2, "bca", 22], [3, "cab", 33], [4, "abc", 44], [5, "bca", 55], ] darr = template.wrap(data) json = darr.to_json() got = deserialise_object(json) self.assertEqual(got.to_dict(), darr.to_dict())
def get_all_psubs(self): """returns all psubs as a dict keyed by used dimensions""" try: defn = self.defn_for["dsubs"] except KeyError: defn = self.defn_for["psubs"] used_dims = defn.used_dimensions() vdims = defn.valid_dimensions indices = [vdims.index(k) for k in used_dims if k in vdims] result = {} darr_template = DictArrayTemplate(self._motifs, self._motifs) for scope, index in defn.index.items(): psub = defn.values[index] key = tuple(numpy.take(scope, indices)) result[key] = darr_template.wrap(psub) return result
def __init__(self, data, motifs, row_indices=None, dtype=None): """ data series of numbers, can be numpy array, CategoryCounter, dict instances row_indices row_indices correspond to original indexes, defaults to length of motif """ # todo validate that motifs are strings and row_indices are ints or # strings # todo change row_indices argument name to row_keys if isinstance(data, numpy.ndarray): some_data = data.any() else: some_data = any(data) if not some_data or len(data) == 0: raise ValueError("Must provide data") try: len(data[0]) except TypeError: ndim = 1 else: ndim = 2 num_elements = len(data) if ndim == 1 else len(data[0]) if num_elements != len(motifs): raise ValueError( f"number of data elements {len(data[0])} != {len(motifs)}") motifs = tuple(motifs) # create template if row_indices is None and ndim == 2: row_indices = len(data) template = DictArrayTemplate(row_indices, motifs) darr = template.wrap(data) try: darr.array.astype(dtype, casting="safe") except TypeError as err: raise ValueError(err) self.__dict__.update(darr.__dict__) self.motifs = motifs self.motif_length = len(motifs[0])
def get_all_rate_matrices(self, calibrated=True): """returns all rate matrices (Q) as a dict, keyed by scope Parameters ---------- calibrated : bool If True, the rate matrix is scaled such that ``sum(pi_i * Qii) == 1``. If False, the calibrated matrix is multiplied by the length parameter (and the rate parameter for a bin if it is a rate heterogeneity model). Returns ------- {scope: DictArray, ...} Notes ----- If a single rate matrix (e.g. it's a time-homogeneous model), the key is an empty tuple. """ defn = self.defn_for["Q"] rate_het = self.defn_for.get("rate", False) if rate_het: bin_index = rate_het.valid_dimensions.index("bin") bin_names = [k[bin_index] for k in rate_het.index] bin_names = {n: i for i, n in enumerate(bin_names)} bin_index = defn.valid_dimensions.index("bin") else: bin_names = None bin_index = None used_dims = defn.used_dimensions() edge_index = defn.valid_dimensions.index("edge") indices = {defn.valid_dimensions.index(k) for k in used_dims} if not calibrated: indices.add(edge_index) if not calibrated and rate_het: indices.add(bin_index) indices = list(sorted(indices)) result = {} darr_template = DictArrayTemplate(self._motifs, self._motifs) for scope, index in defn.index.items(): q = defn.values[index] # this gives the appropriate Q # from scope we extract only the relevant dimensions key = tuple(numpy.take(scope, indices)) q = q.copy() if not calibrated: length = self.get_param_value("length", edge=scope[edge_index]) if rate_het: bdex = bin_names[scope[bin_index]] rate = rate_het.values[bdex] length *= rate q *= length result[key] = darr_template.wrap(q) if not indices and calibrated: break # single rate matrix return result