Example #1
0
def makeForwardTreeDefn(subst_model, tree, bin_names,
        with_indel_params=True, kn=True):
    """Pairwise Fwd"""
    indel = makeIndelModelDefn(with_indel_params, kn)
    subst = subst_model.makeFundamentalParamControllerDefns(bin_names)
    leaf = NonParamDefn('leaf', dimensions=('edge',))
    
    if len(bin_names) > 1:
        switch = ProbabilityParamDefn('bin_switch', dimensions=['locus'])
        bprobs = PartitionDefn(
            [1.0/len(bin_names) for bin in bin_names], name = "bprobs",
            dimensions=['locus'], dimension=('bin', bin_names))
        edge_args = [switch, bprobs]
        edge_defn_constructor = EdgeSumAndAlignDefnWithBins
    else:
        edge_args = []
        edge_defn_constructor = EdgeSumAndAlignDefn
    
    mprobs = subst['word_probs']
    bin_data = CalcDefn(BinData)(mprobs, indel, subst['Qd'])
    bin_data = bin_data.acrossDimension('bin', bin_names)
    edge_args.extend(bin_data)
    
    (top, scores) = _recursive_defns(tree, subst, leaf, edge_defn_constructor,
        edge_args)
    defn = FwdDefn(top)
    #defn = SumDefn(*scores)
    return AnnotateFloatDefn(defn, top)
Example #2
0
def makeForwardTreeDefn(subst_model,
                        tree,
                        bin_names,
                        with_indel_params=True,
                        kn=True):
    """Pairwise Fwd"""
    indel = makeIndelModelDefn(with_indel_params, kn)
    subst = subst_model.makeFundamentalParamControllerDefns(bin_names)
    leaf = NonParamDefn('leaf', dimensions=('edge', ))

    if len(bin_names) > 1:
        switch = ProbabilityParamDefn('bin_switch', dimensions=['locus'])
        bprobs = PartitionDefn([1.0 / len(bin_names) for bin in bin_names],
                               name="bprobs",
                               dimensions=['locus'],
                               dimension=('bin', bin_names))
        edge_args = [switch, bprobs]
        edge_defn_constructor = EdgeSumAndAlignDefnWithBins
    else:
        edge_args = []
        edge_defn_constructor = EdgeSumAndAlignDefn

    mprobs = subst['word_probs']
    bin_data = CalcDefn(BinData)(mprobs, indel, subst['Qd'])
    bin_data = bin_data.acrossDimension('bin', bin_names)
    edge_args.extend(bin_data)

    (top, scores) = _recursive_defns(tree, subst, leaf, edge_defn_constructor,
                                     edge_args)
    defn = FwdDefn(top)
    #defn = SumDefn(*scores)
    return AnnotateFloatDefn(defn, top)
def makeTotalLogLikelihoodDefn(tree, leaves, psubs, mprobs, bprobs, bin_names,
        locus_names, sites_independent):
    
    fixed_motifs = NonParamDefn('fixed_motif', ['edge'])
    
    lht = LikelihoodTreeDefn(leaves, tree=tree)
    
    # Split up the alignment columns between the available CPUs.
    parallel_context = NonParamDefn('parallel_context')
    lht = LikelihoodTreeAlignmentSplitterDefn(parallel_context, lht)
    
    plh = makePartialLikelihoodDefns(tree, lht, psubs, fixed_motifs)
    
    # After the root partial likelihoods have been calculated it remains to
    # sum over the motifs, local sites, other sites (ie: cpus), bins and loci.
    # The motifs are always done first, but after that it gets complicated.
    # If a bin HMM is being used then the sites from the different CPUs must
    # be interleaved first, otherwise summing over the CPUs is done last to
    # minimise inter-CPU communicaton.
    
    root_mprobs = mprobs.selectFromDimension('edge', 'root')
    lh = CalcDefn(numpy.inner, name='lh')(plh, root_mprobs)
    if len(bin_names) > 1:
        if sites_independent:
            site_pattern = CalcDefn(BinnedSiteDistribution, name='bdist')(
                    bprobs)
        else:
            parallel_context = None   # hmm does the gathering over CPUs
            switch = ProbabilityParamDefn('bin_switch', dimensions=['locus'])
            site_pattern = CalcDefn(PatchSiteDistribution, name='bdist')(
                    switch, bprobs)
        blh = CallDefn(site_pattern, lht, name='bindex')
        tll = CallDefn(blh, *lh.acrossDimension('bin', bin_names),
                **dict(name='tll'))
    else:
        lh = lh.selectFromDimension('bin', bin_names[0])
        tll = CalcDefn(log_sum_across_sites, name='logsum')(lht, lh)
    
    if len(locus_names) > 1 or parallel_context is None:
        # "or parallel_context is None" only because SelectFromDimension
        # currently has no .makeParamController() method.
        tll = SumDefn(*tll.acrossDimension('locus', locus_names))
    else:
        tll = tll.selectFromDimension('locus', locus_names[0])
    
    if parallel_context is not None:
        tll = ParallelSumDefn(parallel_context, tll)
    
    return tll
Example #4
0
def makeIndelModelDefn(with_indel_params=True, kn=True):
    if kn:
        klass = indel_model.KnudsenMiyamotoIndelModel
    else:
        klass = indel_model.SimpleIndelModel
    if with_indel_params:
        a = IndelParameterDefn('indel_length')  # P(extend indel)
        r = IndelParameterDefn('indel_rate')  # indels per substitution
        return CalcDefn(klass, name='indels')(r, a)
    else:
        # not optimisable parameter, a constant. Another example is the alignment in an LikFunc
        return NonParamDefn('indel_model')
Example #5
0
def makeTotalLogLikelihoodDefn(tree, leaves, psubs, mprobs, bprobs, bin_names,
                               locus_names, sites_independent):

    fixed_motifs = NonParamDefn('fixed_motif', ['edge'])

    lht = LikelihoodTreeDefn(leaves, tree=tree)

    # Split up the alignment columns between the available CPUs.
    parallel_context = NonParamDefn('parallel_context')
    lht = LikelihoodTreeAlignmentSplitterDefn(parallel_context, lht)

    plh = makePartialLikelihoodDefns(tree, lht, psubs, fixed_motifs)

    # After the root partial likelihoods have been calculated it remains to
    # sum over the motifs, local sites, other sites (ie: cpus), bins and loci.
    # The motifs are always done first, but after that it gets complicated.
    # If a bin HMM is being used then the sites from the different CPUs must
    # be interleaved first, otherwise summing over the CPUs is done last to
    # minimise inter-CPU communicaton.

    root_mprobs = mprobs.selectFromDimension('edge', 'root')
    lh = CalcDefn(numpy.inner, name='lh')(plh, root_mprobs)
    if len(bin_names) > 1:
        if sites_independent:
            site_pattern = CalcDefn(BinnedSiteDistribution,
                                    name='bdist')(bprobs)
        else:
            parallel_context = None  # hmm does the gathering over CPUs
            switch = ProbabilityParamDefn('bin_switch', dimensions=['locus'])
            site_pattern = CalcDefn(PatchSiteDistribution,
                                    name='bdist')(switch, bprobs)
        blh = CallDefn(site_pattern, lht, name='bindex')
        tll = CallDefn(blh, *lh.acrossDimension('bin', bin_names),
                       **dict(name='tll'))
    else:
        lh = lh.selectFromDimension('bin', bin_names[0])
        tll = CalcDefn(log_sum_across_sites, name='logsum')(lht, lh)

    if len(locus_names) > 1 or parallel_context is None:
        # "or parallel_context is None" only because SelectFromDimension
        # currently has no .makeParamController() method.
        tll = SumDefn(*tll.acrossDimension('locus', locus_names))
    else:
        tll = tll.selectFromDimension('locus', locus_names[0])

    if parallel_context is not None:
        tll = ParallelSumDefn(parallel_context, tll)

    return tll
Example #6
0
def makePartialLikelihoodDefns(edge, lht, psubs, fixed_motifs):
    kw = {'edge_name': edge.Name}

    if edge.istip():
        plh = LeafPartialLikelihoodDefn(lht, **kw)
    else:
        lht_edge = LhtEdgeLookupDefn(lht, **kw)
        children = []
        for child in edge.Children:
            child_plh = makePartialLikelihoodDefns(child, lht, psubs,
                                                   fixed_motifs)
            psub = psubs.selectFromDimension('edge', child.Name)
            child_plh = CalcDefn(numpy.inner)(child_plh, psub)
            children.append(child_plh)

        if fixed_motifs:
            fixed_motif = fixed_motifs.selectFromDimension('edge', edge.Name)
            plh = PartialLikelihoodProductDefnFixedMotif(
                fixed_motif, lht_edge, *children, **kw)
        else:
            plh = PartialLikelihoodProductDefn(lht, *children, **kw)

    return plh