Beispiel #1
0
 def test_cond_pos_differ(self):
     """lnL should differ when motif probs are not multiplicative"""
     dinuc_probs = {'AA': 0.088506666666666664, 'AC': 0.044746666666666664,
         'GT': 0.056693333333333332, 'AG': 0.070199999999999999,
         'CC': 0.048653333333333333, 'TT': 0.10678666666666667,
         'CG': 0.0093600000000000003, 'GG': 0.049853333333333333,
         'GC': 0.040253333333333335, 'AT': 0.078880000000000006,
         'GA': 0.058639999999999998, 'TG': 0.081626666666666667,
         'TA': 0.068573333333333333, 'CA': 0.06661333333333333,
         'TC': 0.060866666666666666, 'CT': 0.069746666666666665}
     
     mg = Nucleotide(motif_length=2, motif_probs=dinuc_probs,
                     mprob_model='monomer')
     mg_lf = mg.makeLikelihoodFunction(self.tree)
     mg_lf.setParamRule('length', is_independent=False, init=0.4)
     mg_lf.setAlignment(self.aln)
     
     cd = Nucleotide(motif_length=2, motif_probs=dinuc_probs,
                     mprob_model='conditional')
     
     cd_lf = cd.makeLikelihoodFunction(self.tree)
     cd_lf.setParamRule('length', is_independent=False, init=0.4)
     cd_lf.setAlignment(self.aln)
     self.assertNotAlmostEqual(mg_lf.getLogLikelihood(),
                                 cd_lf.getLogLikelihood())
Beispiel #2
0
 def test_lf_display(self):
     """str of likelihood functions should not fail"""
     for (dummy, model) in self.ordered_by_complexity:
         di = Nucleotide(motif_length=2, mprob_model=model)
         di.adaptMotifProbs(self.cond_root_probs, auto=True)
         lf = di.makeLikelihoodFunction(self.tree)
         s = str(lf)
Beispiel #3
0
 def test_newQ_is_nuc_process(self):
     """newQ is an extension of an independent nucleotide process"""
     nuc = Nucleotide(motif_probs = self.asymm_nuc_probs)
     new_di = Nucleotide(motif_length=2, mprob_model='monomer',
         motif_probs = self.asymm_root_probs)
     
     nuc_lf = nuc.makeLikelihoodFunction(self.tree)
     new_di_lf = new_di.makeLikelihoodFunction(self.tree)
     # newQ branch length is exactly motif_length*nuc branch length
     nuc_lf.setParamRule('length', is_independent=False, init=0.2)
     new_di_lf.setParamRule('length', is_independent=False, init=0.4)
     
     nuc_lf.setAlignment(self.aln)
     new_di_lf.setAlignment(self.aln)
     self.assertFloatEqual(nuc_lf.getLogLikelihood(),
                             new_di_lf.getLogLikelihood())
Beispiel #4
0
 def compare_models(motif_probs, motif_length):
     # if the 1st and 2nd position motifs are independent of each other
     # then conditional is the same as positional
     ps = Nucleotide(motif_length=motif_length, motif_probs=motif_probs,
         mprob_model='monomers')
     cd = Nucleotide(motif_length=motif_length,motif_probs=motif_probs,
                     mprob_model='conditional')
     
     ps_lf = ps.makeLikelihoodFunction(self.tree)
     ps_lf.setParamRule('length', is_independent=False, init=0.4)
     ps_lf.setAlignment(self.aln)
     
     cd_lf = cd.makeLikelihoodFunction(self.tree)
     cd_lf.setParamRule('length', is_independent=False, init=0.4)
     cd_lf.setAlignment(self.aln)
     self.assertFloatEqual(cd_lf.getLogLikelihood(),
             ps_lf.getLogLikelihood())
Beispiel #5
0
 def test_sim_alignment(self):
     """should be able to simulate an alignment under all models"""
     for (mprobs, model) in self.ordered_by_complexity:
         di = Nucleotide(motif_length=2, motif_probs=mprobs, 
                 mprob_model=model)
         lf = di.makeLikelihoodFunction(self.tree)
         lf.setParamRule('length', is_independent=False, init=0.4)
         lf.setAlignment(self.aln)
         sim = lf.simulateAlignment()
Beispiel #6
0
 def test_get_statistics(self):
     """get statistics should correctly apply arguments"""
     for (mprobs, model) in self.ordered_by_complexity:
         di = Nucleotide(motif_length=2, motif_probs=mprobs, 
                 mprob_model=model)
         lf = di.makeLikelihoodFunction(self.tree)
         for wm, wt in [(True, True), (True, False), (False, True),
                        (False, False)]:
             stats = lf.getStatistics(with_motif_probs=wm, with_titles=wt)
Beispiel #7
0
 def test_reconstruct_ancestor(self):
     """should be able to reconstruct ancestral sequences under all
     models"""
     for (mprobs, model) in self.ordered_by_complexity:
         di = Nucleotide(motif_length=2, mprob_model=model)
         di.adaptMotifProbs(mprobs, auto=True)
         lf = di.makeLikelihoodFunction(self.tree)
         lf.setParamRule('length', is_independent=False, init=0.4)
         lf.setAlignment(self.aln)
         ancestor = lf.reconstructAncestralSeqs()
Beispiel #8
0
 def test_position_specific_mprobs(self):
     """correctly compute likelihood when positions have distinct
     probabilities"""
     aln_len = len(self.aln)
     posn1 = []
     posn2 = []
     for name, seq in self.aln.todict().items():
         p1 = [seq[i] for i in range(0,aln_len,2)]
         p2 = [seq[i] for i in range(1,aln_len,2)]
         posn1.append([name, ''.join(p1)])
         posn2.append([name, ''.join(p2)])
     
     # the position specific alignments
     posn1 = LoadSeqs(data=posn1)
     posn2 = LoadSeqs(data=posn2)
     
     # a newQ dinucleotide model
     sm = Nucleotide(motif_length=2, mprob_model='monomer', do_scaling=False)
     lf = sm.makeLikelihoodFunction(self.tree)
     lf.setAlignment(posn1)
     posn1_lnL = lf.getLogLikelihood()
     lf.setAlignment(posn2)
     posn2_lnL = lf.getLogLikelihood()
     expect_lnL = posn1_lnL+posn2_lnL
     
     # the joint model
     lf.setAlignment(self.aln)
     aln_lnL = lf.getLogLikelihood()
     
     # setting the full alignment, which has different motif probs, should
     # produce a different lnL
     self.failIfAlmostEqual(expect_lnL, aln_lnL)
     
     # set the arguments for taking position specific mprobs
     sm = Nucleotide(motif_length=2, mprob_model='monomers',
                     do_scaling=False)
     lf = sm.makeLikelihoodFunction(self.tree)
     lf.setAlignment(self.aln)
     posn12_lnL = lf.getLogLikelihood()
     self.assertFloatEqual(expect_lnL, posn12_lnL)
Beispiel #9
0
    def test_getting_node_mprobs(self):
        """return correct motif probability vector for tree nodes"""
        tree = LoadTree(treestring='(a:.2,b:.2,(c:.1,d:.1):.1)')
        aln = LoadSeqs(data={
            'a': 'TGTG',
            'b': 'TGTG',
            'c': 'TGTG',
            'd': 'TGTG',
        })

        motifs = ['T', 'C', 'A', 'G']
        aX = MotifChange(motifs[0], motifs[3], forward_only=True).aliased('aX')
        bX = MotifChange(motifs[3], motifs[0], forward_only=True).aliased('bX')
        edX = MotifChange(motifs[1], motifs[2],
                          forward_only=True).aliased('edX')
        cX = MotifChange(motifs[2], motifs[1], forward_only=True).aliased('cX')
        sm = Nucleotide(predicates=[aX, bX, edX, cX], equal_motif_probs=True)

        lf = sm.makeLikelihoodFunction(tree)
        lf.setParamRule('aX', edge='a', value=8.0)
        lf.setParamRule('bX', edge='b', value=8.0)
        lf.setParamRule('edX', edge='edge.0', value=2.0)
        lf.setParamRule('cX', edge='c', value=0.5)
        lf.setParamRule('edX', edge='d', value=4.0)
        lf.setAlignment(aln)

        # we construct the hand calc variants
        mprobs = ones(4, float) * .25
        a = make_p(.2, (0, 3), 8)
        a = dot(mprobs, a)

        b = make_p(.2, (3, 0), 8)
        b = dot(mprobs, b)

        e = make_p(.1, (1, 2), 2)
        e = dot(mprobs, e)

        c = make_p(.1, (2, 1), 0.5)
        c = dot(e, c)

        d = make_p(.1, (1, 2), 4)
        d = dot(e, d)

        prob_vectors = lf.getMotifProbsByNode()
        self.assertFloatEqual(prob_vectors['a'].array, a)
        self.assertFloatEqual(prob_vectors['b'].array, b)
        self.assertFloatEqual(prob_vectors['c'].array, c)
        self.assertFloatEqual(prob_vectors['d'].array, d)
        self.assertFloatEqual(prob_vectors['edge.0'].array, e)
Beispiel #10
0
 def test_results_different(self):
     for (i, (mprobs, dummy)) in enumerate(self.ordered_by_complexity):
         results = []
         for (dummy, model) in self.ordered_by_complexity:
             di = Nucleotide(motif_length=2, motif_probs=mprobs, 
                     mprob_model=model)
             lf = di.makeLikelihoodFunction(self.tree)
             lf.setParamRule('length', is_independent=False, init=0.4)
             lf.setAlignment(self.aln)
             lh = lf.getLogLikelihood()
             for other in results[:i]:
                 self.failIfAlmostEqual(other, lh, places=2)
             for other in results[i:]:
                 self.assertFloatEqual(other, lh)
             results.append(lh)
Beispiel #11
0
 def test_getting_node_mprobs(self):
     """return correct motif probability vector for tree nodes"""
     tree = LoadTree(treestring='(a:.2,b:.2,(c:.1,d:.1):.1)')
     aln = LoadSeqs(data={
     'a': 'TGTG',
     'b': 'TGTG',
     'c': 'TGTG',
     'd': 'TGTG',
     })
     
     motifs = ['T', 'C', 'A', 'G']
     aX = MotifChange(motifs[0], motifs[3], forward_only=True).aliased('aX')
     bX = MotifChange(motifs[3], motifs[0], forward_only=True).aliased('bX')
     edX = MotifChange(motifs[1], motifs[2], forward_only=True).aliased('edX')
     cX = MotifChange(motifs[2], motifs[1], forward_only=True).aliased('cX')
     sm = Nucleotide(predicates=[aX, bX, edX, cX], equal_motif_probs=True)
     
     lf = sm.makeLikelihoodFunction(tree)
     lf.setParamRule('aX', edge='a', value=8.0)
     lf.setParamRule('bX', edge='b', value=8.0)
     lf.setParamRule('edX', edge='edge.0', value=2.0)
     lf.setParamRule('cX', edge='c', value=0.5)
     lf.setParamRule('edX', edge='d', value=4.0)
     lf.setAlignment(aln)
     
     # we construct the hand calc variants
     mprobs = ones(4, float) * .25
     a = make_p(.2, (0,3), 8)
     a = dot(mprobs, a)
     
     b = make_p(.2, (3, 0), 8)
     b = dot(mprobs, b)
     
     e = make_p(.1, (1, 2), 2)
     e = dot(mprobs, e)
     
     c = make_p(.1, (2, 1), 0.5)
     c = dot(e, c)
     
     d = make_p(.1, (1, 2), 4)
     d = dot(e, d)
     
     prob_vectors = lf.getMotifProbsByNode()
     self.assertFloatEqual(prob_vectors['a'].array, a)
     self.assertFloatEqual(prob_vectors['b'].array, b)
     self.assertFloatEqual(prob_vectors['c'].array, c)
     self.assertFloatEqual(prob_vectors['d'].array, d)
     self.assertFloatEqual(prob_vectors['edge.0'].array, e)
Beispiel #12
0
 def test_results_different(self):
     for (i, (mprobs, dummy)) in enumerate(self.ordered_by_complexity):
         results = []
         for (dummy, model) in self.ordered_by_complexity:
             di = Nucleotide(motif_length=2,
                             motif_probs=mprobs,
                             mprob_model=model)
             lf = di.makeLikelihoodFunction(self.tree)
             lf.setParamRule('length', is_independent=False, init=0.4)
             lf.setAlignment(self.aln)
             lh = lf.getLogLikelihood()
             for other in results[:i]:
                 self.assertNotAlmostEqual(other, lh, places=2)
             for other in results[i:]:
                 self.assertFloatEqual(other, lh)
             results.append(lh)