def reconstructAncestralSeqs(self, locus=None): """returns a dict of DictArray objects containing probabilities of each alphabet state for each node in the tree. Arguments: - locus: a named locus""" result = {} array_template = None for restricted_edge in self._tree.getEdgeVector(): if restricted_edge.istip(): continue try: r = [] for motif in range(len(self._motifs)): self.setParamRule('fixed_motif', value=motif, edge=restricted_edge.Name, locus=locus, is_constant=True) likelihoods = self.getFullLengthLikelihoods(locus=locus) r.append(likelihoods) if array_template is None: array_template = DictArrayTemplate( likelihoods.shape[0], self._motifs) finally: self.setParamRule('fixed_motif', value=-1, edge=restricted_edge.Name, locus=locus, is_constant=True) # dict of site x motif arrays result[restricted_edge.Name] = array_template.wrap( numpy.transpose(numpy.asarray(r))) return result
def reconstructAncestralSeqs(self, locus=None): """returns a dict of DictArray objects containing probabilities of each alphabet state for each node in the tree. Arguments: - locus: a named locus""" result = {} array_template = None for restricted_edge in self._tree.getEdgeVector(): if restricted_edge.istip(): continue try: r = [] for motif in range(len(self._motifs)): self.setParamRule( "fixed_motif", value=motif, edge=restricted_edge.Name, locus=locus, is_constant=True ) likelihoods = self.getFullLengthLikelihoods(locus=locus) r.append(likelihoods) if array_template is None: array_template = DictArrayTemplate(likelihoods.shape[0], self._motifs) finally: self.setParamRule("fixed_motif", value=-1, edge=restricted_edge.Name, locus=locus, is_constant=True) # dict of site x motif arrays result[restricted_edge.Name] = array_template.wrap(numpy.transpose(numpy.asarray(r))) return result
def __init__(self, default=None, name=None, dimensions=None, dimension=None, size=None, **kw): assert name if size is not None: pass elif default is not None: size = len(default) elif dimension is not None: size = len(dimension[1]) self.size = size if dimension is not None: self.internal_dimension = dimension (dim_name, dim_cats) = dimension self.bin_names = dim_cats self.array_template = DictArrayTemplate(dim_cats) self.internal_dimensions = (dim_name, ) if default is None: default = self._makeDefaultValue() elif self.array_template is not None: default = self.array_template.unwrap(default) else: default = numpy.asarray(default) _InputDefn.__init__(self, name=name, default=default, dimensions=dimensions, **kw) self.checkValueIsValid(default, True)
def getMotifProbs(self, edge=None, bin=None, locus=None): motif_probs_array = self.getParamValue('mprobs', edge=edge, bin=bin, locus=locus) mprob_name = [p for p in self.getParamNames() if 'mprobs' in p][0] if 'position' in self.getUsedDimensions(mprob_name): positions = self._valuesForDimension('position') motifs = self._mprob_motifs return DictArrayTemplate(positions, motifs).wrap(motif_probs_array) return DictArrayTemplate(self._mprob_motifs).wrap(motif_probs_array)
def getMotifProbsByNode(self, edges=None, bin=None, locus=None): kw = dict(bin=bin, locus=locus) mprobs = self.getParamValue('mprobs', **kw) mprobs = self._model.calcWordProbs(mprobs) result = self._nodeMotifProbs(self._tree, mprobs, kw) if edges is None: edges = [name for (name, m) in result] result = dict(result) values = [result[name] for name in edges] if len(values[0]) == len(self._mprob_motifs): return DictArrayTemplate(edges, self._mprob_motifs).wrap(values) return DictArrayTemplate(edges, self._motifs).wrap(values)
def __init__(self, default=None, name=None, dimensions=None, dimension=None, size=None, **kw): assert name if size is not None: pass elif default is not None: size = len(default) elif dimension is not None: size = len(dimension[1]) self.size = size if dimension is not None: self.internal_dimension = dimension (dim_name, dim_cats) = dimension self.bin_names = dim_cats self.array_template = DictArrayTemplate(dim_cats) self.internal_dimensions = (dim_name,) if default is None: default = self._makeDefaultValue() elif self.array_template is not None: default = self.array_template.unwrap(default) else: default = numpy.asarray(default) _InputDefn.__init__(self, name=name, default=default, dimensions=dimensions, **kw) self.checkValueIsValid(default, True)
def getPsubForEdge(self, name, **kw): """returns the substitution probability matrix for the named edge""" try: # For PartialyDiscretePsubsDefn array = self.getParamValue('dpsubs', edge=name, **kw) except KeyError: array = self.getParamValue('psubs', edge=name, **kw) return DictArrayTemplate(self._motifs, self._motifs).wrap(array)
def getBinProbs(self, locus=None): hmm = self.getParamValue('bindex', locus=locus) lhs = [ self.getParamValue('lh', locus=locus, bin=bin) for bin in self.bin_names ] array = hmm.getPosteriorProbs(*lhs) return DictArrayTemplate(self.bin_names, array.shape[1]).wrap(array)
def getRateMatrixForEdge(self, name, **kw): """returns the rate matrix (Q) for the named edge Note: expm(Q) will give the same result as getPsubForEdge(name)""" try: array = self.getParamValue('Q', edge=name, **kw) except KeyError as err: if err[0] == 'Q' and name != 'Q': raise RuntimeError('rate matrix not known by this model') else: raise return DictArrayTemplate(self._motifs, self._motifs).wrap(array)
def getBinPriorProbs(self, locus=None): bin_probs_array = self.getParamValue('bprobs', locus=locus) return DictArrayTemplate(self.bin_names).wrap(bin_probs_array)
def getMotifProbs(self, edge=None, bin=None, locus=None): motif_probs_array = self.getParamValue('mprobs', edge=edge, bin=bin, locus=locus) return DictArrayTemplate(self._mprob_motifs).wrap(motif_probs_array)
class PartitionDefn(_InputDefn): """A partition such as mprobs can be const or optimised. Optimised is a bit tricky since it isn't just a scalar.""" numeric = False # well, not scalar anyway const_by_default = False independent_by_default = False def __init__(self, default=None, name=None, dimensions=None, dimension=None, size=None, **kw): assert name if size is not None: pass elif default is not None: size = len(default) elif dimension is not None: size = len(dimension[1]) self.size = size if dimension is not None: self.internal_dimension = dimension (dim_name, dim_cats) = dimension self.bin_names = dim_cats self.array_template = DictArrayTemplate(dim_cats) self.internal_dimensions = (dim_name,) if default is None: default = self._makeDefaultValue() elif self.array_template is not None: default = self.array_template.unwrap(default) else: default = numpy.asarray(default) _InputDefn.__init__(self, name=name, default=default, dimensions=dimensions, **kw) self.checkValueIsValid(default, True) def _makeDefaultValue(self): return numpy.array([1.0/self.size] * self.size) def makeDefaultSetting(self): #return ConstVal(self.default) return Var((None, self.default.copy(), None)) def checkSettingIsValid(self, setting): value = setting.getDefaultValue() return self.checkValueIsValid(value, setting.is_constant) def checkValueIsValid(self, value, is_constant): if value.shape != (self.size,): raise ValueError("Wrong array shape %s for %s, expected (%s,)" % (value.shape, self.name, self.size)) for part in value: if part < 0: raise ValueError("Negative probability in %s" % self.name) if part > 1: raise ValueError("Probability > 1 in %s" % self.name) if not is_constant: # 0 or 1 leads to log(0) or log(inf) in optimiser code if part == 0: raise ValueError("Zeros allowed in %s only when constant" % self.name) if part == 1: raise ValueError("Ones allowed in %s only when constant" % self.name) if abs(sum(value) - 1.0) > .00001: raise ValueError("Elements of %s must sum to 1.0, not %s" % (self.name, sum(value))) def _makePartitionCell(self, name, scope, value): # This was originally put in its own function so as to provide a # closure containing the value of sum(value), which is no longer # required since it is now always 1.0. N = len(value) assert abs(sum(value) - 1.0) < .00001 ratios = _unpack_proportions(value) ratios = [LogOptPar(name+'_ratio', scope, (1e-6,r,1e+6)) for r in ratios] def r2p(*ratios): return numpy.asarray(_proportions(1.0, ratios)) partition = EvaluatedCell(name, r2p, tuple(ratios)) return (ratios, partition) def makeCells(self, input_soup={}, variable=None): uniq_cells = [] all_cells = [] for (i, v) in enumerate(self.uniq): if v is None: raise ValueError("input %s not set" % self.name) assert hasattr(v, 'getDefaultValue'), v value = v.getDefaultValue() assert hasattr(value, 'shape'), value assert value.shape == (self.size,) scope = [key for key in self.assignments if self.assignments[key] is v] assert value is not None if v.is_constant or (variable is not None and variable is not v): partition = ConstCell(self.name, value) else: (ratios, partition) = self._makePartitionCell( self.name, scope, value) all_cells.extend(ratios) all_cells.append(partition) uniq_cells.append(partition) return (all_cells, uniq_cells)
class PartitionDefn(_InputDefn): """A partition such as mprobs can be const or optimised. Optimised is a bit tricky since it isn't just a scalar.""" numeric = False # well, not scalar anyway const_by_default = False independent_by_default = False def __init__(self, default=None, name=None, dimensions=None, dimension=None, size=None, **kw): assert name if size is not None: pass elif default is not None: size = len(default) elif dimension is not None: size = len(dimension[1]) self.size = size if dimension is not None: self.internal_dimension = dimension (dim_name, dim_cats) = dimension self.bin_names = dim_cats self.array_template = DictArrayTemplate(dim_cats) self.internal_dimensions = (dim_name, ) if default is None: default = self._makeDefaultValue() elif self.array_template is not None: default = self.array_template.unwrap(default) else: default = numpy.asarray(default) _InputDefn.__init__(self, name=name, default=default, dimensions=dimensions, **kw) self.checkValueIsValid(default, True) def _makeDefaultValue(self): return numpy.array([1.0 / self.size] * self.size) def makeDefaultSetting(self): #return ConstVal(self.default) return Var((None, self.default.copy(), None)) def checkSettingIsValid(self, setting): value = setting.getDefaultValue() return self.checkValueIsValid(value, setting.is_constant) def checkValueIsValid(self, value, is_constant): if value.shape != (self.size, ): raise ValueError("Wrong array shape %s for %s, expected (%s,)" % (value.shape, self.name, self.size)) for part in value: if part < 0: raise ValueError("Negative probability in %s" % self.name) if part > 1: raise ValueError("Probability > 1 in %s" % self.name) if not is_constant: # 0 or 1 leads to log(0) or log(inf) in optimiser code if part == 0: raise ValueError("Zeros allowed in %s only when constant" % self.name) if part == 1: raise ValueError("Ones allowed in %s only when constant" % self.name) if abs(sum(value) - 1.0) > .00001: raise ValueError("Elements of %s must sum to 1.0, not %s" % (self.name, sum(value))) def _makePartitionCell(self, name, scope, value): # This was originally put in its own function so as to provide a # closure containing the value of sum(value), which is no longer # required since it is now always 1.0. N = len(value) assert abs(sum(value) - 1.0) < .00001 ratios = _unpack_proportions(value) ratios = [ LogOptPar(name + '_ratio', scope, (1e-6, r, 1e+6)) for r in ratios ] def r2p(*ratios): return numpy.asarray(_proportions(1.0, ratios)) partition = EvaluatedCell(name, r2p, tuple(ratios)) return (ratios, partition) def makeCells(self, input_soup={}, variable=None): uniq_cells = [] all_cells = [] for (i, v) in enumerate(self.uniq): if v is None: raise ValueError("input %s not set" % self.name) assert hasattr(v, 'getDefaultValue'), v value = v.getDefaultValue() assert hasattr(value, 'shape'), value assert value.shape == (self.size, ) scope = [ key for key in self.assignments if self.assignments[key] is v ] assert value is not None if v.is_constant or (variable is not None and variable is not v): partition = ConstCell(self.name, value) else: (ratios, partition) = self._makePartitionCell(self.name, scope, value) all_cells.extend(ratios) all_cells.append(partition) uniq_cells.append(partition) return (all_cells, uniq_cells)