def makeForwardTreeDefn(subst_model, tree, bin_names, with_indel_params=True, kn=True): """Pairwise Fwd""" indel = makeIndelModelDefn(with_indel_params, kn) subst = subst_model.makeFundamentalParamControllerDefns(bin_names) leaf = NonParamDefn('leaf', dimensions=('edge',)) if len(bin_names) > 1: switch = ProbabilityParamDefn('bin_switch', dimensions=['locus']) bprobs = PartitionDefn( [1.0/len(bin_names) for bin in bin_names], name = "bprobs", dimensions=['locus'], dimension=('bin', bin_names)) edge_args = [switch, bprobs] edge_defn_constructor = EdgeSumAndAlignDefnWithBins else: edge_args = [] edge_defn_constructor = EdgeSumAndAlignDefn mprobs = subst['word_probs'] bin_data = CalcDefn(BinData)(mprobs, indel, subst['Qd']) bin_data = bin_data.acrossDimension('bin', bin_names) edge_args.extend(bin_data) (top, scores) = _recursive_defns(tree, subst, leaf, edge_defn_constructor, edge_args) defn = FwdDefn(top) #defn = SumDefn(*scores) return AnnotateFloatDefn(defn, top)
def makeForwardTreeDefn(subst_model, tree, bin_names, with_indel_params=True, kn=True): """Pairwise Fwd""" indel = makeIndelModelDefn(with_indel_params, kn) subst = subst_model.makeFundamentalParamControllerDefns(bin_names) leaf = NonParamDefn('leaf', dimensions=('edge', )) if len(bin_names) > 1: switch = ProbabilityParamDefn('bin_switch', dimensions=['locus']) bprobs = PartitionDefn([1.0 / len(bin_names) for bin in bin_names], name="bprobs", dimensions=['locus'], dimension=('bin', bin_names)) edge_args = [switch, bprobs] edge_defn_constructor = EdgeSumAndAlignDefnWithBins else: edge_args = [] edge_defn_constructor = EdgeSumAndAlignDefn mprobs = subst['word_probs'] bin_data = CalcDefn(BinData)(mprobs, indel, subst['Qd']) bin_data = bin_data.acrossDimension('bin', bin_names) edge_args.extend(bin_data) (top, scores) = _recursive_defns(tree, subst, leaf, edge_defn_constructor, edge_args) defn = FwdDefn(top) #defn = SumDefn(*scores) return AnnotateFloatDefn(defn, top)
def makeTotalLogLikelihoodDefn(tree, leaves, psubs, mprobs, bprobs, bin_names, locus_names, sites_independent): fixed_motifs = NonParamDefn('fixed_motif', ['edge']) lht = LikelihoodTreeDefn(leaves, tree=tree) # Split up the alignment columns between the available CPUs. parallel_context = NonParamDefn('parallel_context') lht = LikelihoodTreeAlignmentSplitterDefn(parallel_context, lht) plh = makePartialLikelihoodDefns(tree, lht, psubs, fixed_motifs) # After the root partial likelihoods have been calculated it remains to # sum over the motifs, local sites, other sites (ie: cpus), bins and loci. # The motifs are always done first, but after that it gets complicated. # If a bin HMM is being used then the sites from the different CPUs must # be interleaved first, otherwise summing over the CPUs is done last to # minimise inter-CPU communicaton. root_mprobs = mprobs.selectFromDimension('edge', 'root') lh = CalcDefn(numpy.inner, name='lh')(plh, root_mprobs) if len(bin_names) > 1: if sites_independent: site_pattern = CalcDefn(BinnedSiteDistribution, name='bdist')( bprobs) else: parallel_context = None # hmm does the gathering over CPUs switch = ProbabilityParamDefn('bin_switch', dimensions=['locus']) site_pattern = CalcDefn(PatchSiteDistribution, name='bdist')( switch, bprobs) blh = CallDefn(site_pattern, lht, name='bindex') tll = CallDefn(blh, *lh.acrossDimension('bin', bin_names), **dict(name='tll')) else: lh = lh.selectFromDimension('bin', bin_names[0]) tll = CalcDefn(log_sum_across_sites, name='logsum')(lht, lh) if len(locus_names) > 1 or parallel_context is None: # "or parallel_context is None" only because SelectFromDimension # currently has no .makeParamController() method. tll = SumDefn(*tll.acrossDimension('locus', locus_names)) else: tll = tll.selectFromDimension('locus', locus_names[0]) if parallel_context is not None: tll = ParallelSumDefn(parallel_context, tll) return tll
def makeIndelModelDefn(with_indel_params=True, kn=True): if kn: klass = indel_model.KnudsenMiyamotoIndelModel else: klass = indel_model.SimpleIndelModel if with_indel_params: a = IndelParameterDefn('indel_length') # P(extend indel) r = IndelParameterDefn('indel_rate') # indels per substitution return CalcDefn(klass, name='indels')(r, a) else: # not optimisable parameter, a constant. Another example is the alignment in an LikFunc return NonParamDefn('indel_model')
def makeTotalLogLikelihoodDefn(tree, leaves, psubs, mprobs, bprobs, bin_names, locus_names, sites_independent): fixed_motifs = NonParamDefn('fixed_motif', ['edge']) lht = LikelihoodTreeDefn(leaves, tree=tree) # Split up the alignment columns between the available CPUs. parallel_context = NonParamDefn('parallel_context') lht = LikelihoodTreeAlignmentSplitterDefn(parallel_context, lht) plh = makePartialLikelihoodDefns(tree, lht, psubs, fixed_motifs) # After the root partial likelihoods have been calculated it remains to # sum over the motifs, local sites, other sites (ie: cpus), bins and loci. # The motifs are always done first, but after that it gets complicated. # If a bin HMM is being used then the sites from the different CPUs must # be interleaved first, otherwise summing over the CPUs is done last to # minimise inter-CPU communicaton. root_mprobs = mprobs.selectFromDimension('edge', 'root') lh = CalcDefn(numpy.inner, name='lh')(plh, root_mprobs) if len(bin_names) > 1: if sites_independent: site_pattern = CalcDefn(BinnedSiteDistribution, name='bdist')(bprobs) else: parallel_context = None # hmm does the gathering over CPUs switch = ProbabilityParamDefn('bin_switch', dimensions=['locus']) site_pattern = CalcDefn(PatchSiteDistribution, name='bdist')(switch, bprobs) blh = CallDefn(site_pattern, lht, name='bindex') tll = CallDefn(blh, *lh.acrossDimension('bin', bin_names), **dict(name='tll')) else: lh = lh.selectFromDimension('bin', bin_names[0]) tll = CalcDefn(log_sum_across_sites, name='logsum')(lht, lh) if len(locus_names) > 1 or parallel_context is None: # "or parallel_context is None" only because SelectFromDimension # currently has no .makeParamController() method. tll = SumDefn(*tll.acrossDimension('locus', locus_names)) else: tll = tll.selectFromDimension('locus', locus_names[0]) if parallel_context is not None: tll = ParallelSumDefn(parallel_context, tll) return tll
def makePartialLikelihoodDefns(edge, lht, psubs, fixed_motifs): kw = {'edge_name': edge.Name} if edge.istip(): plh = LeafPartialLikelihoodDefn(lht, **kw) else: lht_edge = LhtEdgeLookupDefn(lht, **kw) children = [] for child in edge.Children: child_plh = makePartialLikelihoodDefns(child, lht, psubs, fixed_motifs) psub = psubs.selectFromDimension('edge', child.Name) child_plh = CalcDefn(numpy.inner)(child_plh, psub) children.append(child_plh) if fixed_motifs: fixed_motif = fixed_motifs.selectFromDimension('edge', edge.Name) plh = PartialLikelihoodProductDefnFixedMotif( fixed_motif, lht_edge, *children, **kw) else: plh = PartialLikelihoodProductDefn(lht, *children, **kw) return plh