Beispiel #1
0
class OldPhyloNodeTests(TestCase):
    """Tests of the PhyloNode class -- these are all now methods of RangeNode."""
    def setUp(self):
        """Make a couple of standard trees"""
        self.t1 = DndParser('((a,(b,c)),(d,e))', RangeNode)
        #selt.t1 indices: ((0,(1,2)5)6,(3,4)7)8

    def test_makeIdIndex(self):
        """RangeNode makeIdIndex should assign ids to every node"""
        self.t1.makeIdIndex()
        result = self.t1.IdIndex
        nodes = list(self.t1.traverse(self_before=True))
        #check we got an entry for each node
        self.assertEqual(len(result), len(nodes))
        #check the ids are in the result
        for i in nodes:
            assert hasattr(i, 'Id')
            assert i.Id in result

    def test_assignQ_single_passed(self):
        """RangeNode assignQ should propagate single Q param down tree"""
        #should work if Q explicitly passed
        t = self.t1
        Q = ['a']
        t.assignQ(Q)
        for node in t.traverse(self_before=True):
            assert node.Q is Q

    def test_assignQ_single_set(self):
        """RangeNode assignQ should propagate single Q if set"""
        t = self.t1
        Q = ['a']
        assert not hasattr(t, 'Q')
        t.Q = Q
        t.assignQ()
        for node in t.traverse(self_before=True):
            assert node.Q is Q

    def test_assignQ_single_overwrite(self):
        """RangeNode assignQ should overwrite root Q if new Q passed"""
        t = self.t1
        Q = ['a']
        Q2 = ['b']
        t.Q = Q
        t.assignQ(Q2)
        for node in t.traverse(self_before=True):
            assert node.Q is Q2
            assert not node.Q is Q

    def test_assignQ_multiple(self):
        """RangeNode assignQ should propagate multiple Qs"""
        t = self.t1
        Q1 = ['a']
        Q2 = ['b']
        Q3 = ['c']
        t.makeIdIndex()
        t.IdIndex[7].Q = Q1
        t.IdIndex[5].Q = Q2
        t.assignQ(Q3)
        result = [i.Q for i in t.traverse(self_after=True)]
        assert t.Q is Q3
        self.assertEqual(result, [Q3, Q2, Q2, Q2, Q3, Q1, Q1, Q1, Q3])

    def test_assignQ_multiple_overwrite(self):
        """RangeNode assignQ should allow overwrite"""
        t = self.t1
        Q1 = ['a']
        Q2 = ['b']
        Q3 = ['c']
        t.makeIdIndex()
        t.IdIndex[7].Q = Q1
        t.IdIndex[5].Q = Q2
        t.assignQ(Q3, overwrite=True)
        for i in t.traverse(self_after=True):
            assert i.Q is Q3

    def test_assignQ_special(self):
        """RangeNode assignQ should work with special Qs"""
        t = self.t1
        Q1 = 'a'
        Q2 = 'b'
        Q3 = 'c'
        t.makeIdIndex()
        special = {7: Q1, 1: Q2}
        #won't work if no Q at root
        self.assertRaises(ValueError, t.assignQ, special_qs=special)
        t.assignQ(Q3, special_qs=special)
        result = [i.Q for i in t.traverse(self_after=True)]
        self.assertEqual(result, ['c', 'b', 'c', 'c', 'c', 'a', 'a', 'a', 'c'])

    def test_assignP(self):
        """RangeNode assignP should work when Qs set."""
        t = self.t1
        for i in t.traverse(self_before=True):
            i.Length = random() * 0.5  #range 0 to 0.5
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignP()
        t.assignIds()
        for node in t.traverse(self_after=True):
            if node.Parent is not None:
                self.assertFloatEqual(average(1-diag(node.P._data), axis=0), \
                    node.Length)

    def test_assignLength(self):
        """RangeNode assignLength should set branch length"""
        t = self.t1
        t.assignLength(0.3)
        for i in t.traverse(self_before=True):
            self.assertEqual(i.Length, 0.3)

    def test_evolve(self):
        """RangeNode evolve should work on a starting vector"""
        t = self.t1
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignLength(0.1)
        t.assignP()
        start = array([
            1, 0, 2, 1, 0, 0, 2, 1, 2, 0, 1, 2, 1, 0, 2, 0, 0, 3, 0, 2, 1, 0,
            3, 1, 0, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3,
            3, 3, 3
        ])
        t.evolve(start)
        for i in t.traverse():
            self.assertEqual(len(i.Sequence), len(start))
            self.assertNotEqual(i.Sequence, start)
        #WARNING: Doesn't test base freqs etc. at this point, but those aren't
        #really evolve()'s responsibity (tested as self.P.mutate(seq) once
        #P is set, which we've already demonstrated works.)

    def test_assignPs(self):
        """RangeNode assignPs should assign multiple scaled P matrices"""
        t = self.t1
        for i in t.traverse(self_before=True):
            i.Length = random() * 0.5  #range 0 to 0.5
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignPs([1, 0.5, 0.25])
        t.assignIds()
        for node in t.traverse(self_after=True):
            if node.Parent is not None:
                self.assertEqual(len(node.Ps), 3)
                self.assertFloatEqual(average(1-diag(node.Ps[0]._data), axis=0), \
                    node.Length)
                self.assertFloatEqual(average(1-diag(node.Ps[1]._data), axis=0), \
                    0.5*node.Length)
                self.assertFloatEqual(average(1-diag(node.Ps[2]._data), axis=0), \
                    0.25*node.Length)

    def test_evolveSeqs(self):
        """PhlyoNode evolveSeqs should evolve multiple sequences"""
        t = self.t1
        for i in t.traverse(self_before=True):
            i.Length = 0.5
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignPs([1, 1, 0.1])
        t.assignIds()
        orig_seqs = [array(i) for i in [randint(0,4,200), randint(0,4,200), \
            randint(0,4,200)]]
        t.evolveSeqs(orig_seqs)
        for node in t.traverse():  #only look at leaves
            if node.Parent is not None:
                self.assertEqual(len(node.Sequences), 3)
                for orig, new in zip(orig_seqs, node.Sequences):
                    self.assertEqual(len(orig), len(new))
                    self.assertNotEqual(orig, new)
                assert sum(orig_seqs[1]!=node.Sequences[1]) > \
                        sum(orig_seqs[2]!=node.Sequences[2])
Beispiel #2
0
class analysisTests(TestCase):
    """Tests of top-level functions."""
    def setUp(self):
        """Make a couple of standard trees"""
        self.t1 = DndParser('((a,(b,c)),(d,e))', RangeNode)
        #selt.t1 indices: ((0,(1,2)5)6,(3,4)7)8
 
    def test_threeway_counts(self):
        """threeway_counts should produce correct count matrix"""
        self.t1.makeIdIndex()
        ind = self.t1.IdIndex
        ind[0].Sequence = array([0,0,0])
        ind[1].Sequence = array([0,1,0])
        ind[2].Sequence = array([1,0,1])
        ind[3].Sequence = array([1,1,0])
        ind[4].Sequence = array([1,1,1])
        depths = self.t1.leafLcaDepths()
        result = tree_threeway_counts(self.t1, depths, ABPairs)
        #check we got the right number of comparisons
        self.assertEqual(len(result), 20)
        #check we got the right keys
        for k in [(1,2,0),(2,1,0),(0,1,3),(1,0,3),(0,1,4),(1,0,4),(0,2,3),\
            (2,0,3),(0,2,4),(2,0,4),(1,2,3),(2,1,3),(1,2,4),(2,1,4),(3,4,1),\
            (4,3,1),(3,4,2),(4,3,2)]:
            assert k in result
        #spot-check a few results
        self.assertEqual(result[(1,2,0)]._data, array([[2,1],[0,0]]))
        self.assertEqual(result[(2,1,0)]._data, array([[1,2],[0,0]]))
        self.assertEqual(result[(2,1,3)]._data, array([[0,1],[1,1]]))
        
    def test_twoway_counts(self):
        """twoway_counts should produce correct count matrix"""
        self.t1.makeIdIndex()
        ind = self.t1.IdIndex
        ind[0].Sequence = array([0,0,0])
        ind[1].Sequence = array([0,1,0])
        ind[2].Sequence = array([1,0,1])
        ind[3].Sequence = array([1,1,0])
        ind[4].Sequence = array([1,1,1])
        depths = self.t1.leafLcaDepths()
        #check that it works with averaging
        result = tree_twoway_counts(self.t1, ABPairs)
        #check we got the right number of comparisons: average by default
        self.assertEqual(len(result), 10)
        #check we got the right keys
        for k in [(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]:
            assert k in result
        #spot-check a few results
        self.assertEqual(result[(0,1)]._data, array([[2,.5],[.5,0]]))
        self.assertEqual(result[(2,3)]._data, array([[0,1],[1,1]]))
        #check that it works when we don't average
        result = tree_twoway_counts(self.t1, ABPairs, average=False)
        self.assertEqual(len(result), 20)
        #check we got the right keys
        for k in [(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]:
            assert k in result
            #reverse should be in result too
            assert (k[1],k[0]) in result
        #spot-check values
        self.assertEqual(result[(0,1)]._data, array([[2,1],[0,0]]))
        self.assertEqual(result[(1,0)]._data, array([[2,0],[1,0]]))
        
    def test_counts_to_probs(self):
        """counts_to_probs should skip cases with zero rows"""
        counts = {
            (0,1): Counts(array([[0,1],[1,0]]), ABPairs),
            (1,2): Counts(array([[0,0],[1,0]]), ABPairs),           #bad row
            (0,3): Counts(array([[0,0],[0,0]]), ABPairs),           #bad row
            (0,4): Counts(array([[0.0,0.0],[0.0,0.0]]), ABPairs),   #bad row
            (0,5): Counts(array([[0.1,0.3],[0.0,0.0]]), ABPairs),   #bad row
            (3,4): Counts(array([[0.1,0.3],[0.4,0.1]]), ABPairs),
            (2,1): Counts(array([[0,5],[1,0]]), ABPairs),
            }
        result = counts_to_probs(counts)
        self.assertEqual(len(result), 3)
        self.assertFloatEqual(result[(0,1)]._data, array([[0,1],[1,0]]))
        self.assertFloatEqual(result[(3,4)]._data, \
            array([[0.25,0.75],[0.8,0.2]]))
        self.assertFloatEqual(result[(2,1)]._data, array([[0,1],[1,0]]))

    def test_probs_to_rates(self):
        """probs_to_rates converts probs to rates, omitting problem cases"""
        probs = dict([(i, Probs.random(DnaPairs)) for i in range(100)])
        rates = probs_to_rates(probs)
        #check we got at most the same number of items as in probs
        assert len(rates) <= len(probs)
        #check that we didn't get anything bad
        vals = rates.values()
        for v in vals:
            assert not v.isSignificantlyComplex()
        #check that we didn't miss anything good
        for key, val in probs.items():
            if key not in rates:
                try:
                    r = val.toRates()
                    print r.isValid()
                    assert r.isSignificantlyComplex() or (not r.isValid())
                except (ZeroDivisionError, OverflowError, ValueError):
                    pass

    def test_rates_to_array(self):
        """rates_to_array should pack rates into array correctly"""
        m1 = array([[-1,1,1,1],[2,-2,2,2],[3,3,-3,3],[1,2,3,-4]])
        m2 = m1 * 2
        m3 = m1 * 0.5
        m4 = zeros((4,4))
        m5 = array([0,0])
        r1, r2, r3, r4, r5 = [Rates(i, DnaPairs) for i in m1,m2,m3,m4,m5]
    
        data = {(0,1,0):r1, (1,2,0):r2, (2,0,0):r3, (2,1,1):r4}
        
        #note that array can be, but need not be, floating point
        to_fill = zeros((3,3,3,16), 'float64')
        result = rates_to_array(data, to_fill)
        #check that the thnigs we deliberately set are OK
        self.assertEqual(to_fill[0][1][0], ravel(m1))
        self.assertNotEqual(to_fill[0][1][0], ravel(m2))
        self.assertEqual(to_fill[1,2,0], ravel(m2))
        self.assertEqual(to_fill[2][0][0], ravel(m3))
        self.assertEqual(to_fill[2][1][1], ravel(m4))
        #check that everything else is zero
        nonzero = [(0,1,0),(1,2,0),(2,0,0)]
        for x in [(i, j, k) for i in range(3) for j in range(3) \
            for k in range(3)]:
            if x not in nonzero:
                self.assertEqual(to_fill[x], zeros(16))
        #check that it works omitting the diagonal
        to_fill = zeros((3,3,3,12), 'float64')
        result = rates_to_array(data, to_fill, without_diagonal=True)
        #check that the thnigs we deliberately set are OK
        m1_nodiag = array([[1,1,1],[2,2,2],[3,3,3],[1,2,3]])
        self.assertEqual(to_fill[0][1][0], ravel(m1_nodiag))
        self.assertNotEqual(to_fill[0][1][0], ravel(m1_nodiag*2))
        self.assertEqual(to_fill[1,2,0], ravel(m1_nodiag*2))
        self.assertEqual(to_fill[2][0][0], ravel(m1_nodiag*0.5))
        self.assertEqual(to_fill[2][1][1], zeros(12))
        #check that everything else is zero
        nonzero = [(0,1,0),(1,2,0),(2,0,0)]
        for x in [(i, j, k) for i in range(3) for j in range(3) \
            for k in range(3)]:
            if x not in nonzero:
                self.assertEqual(to_fill[x], zeros(12))
    
    @occasionally_fails
    def test_tree_threeway_rates(self):
        """tree_threeway_rates should give plausible results on rand trees"""
        t = self.t1
        t.assignLength(0.05)
        t.Q = Rates.random(DnaPairs).normalize()
        t.assignQ()
        t.assignP()
        t.evolve(randint(0,4,100))
        t.makeIdIndex()
        depths = t.leafLcaDepths()
        result = tree_threeway_rates(t, depths)
        self.assertEqual(result.shape, (5,5,5,16))
        #check that row sums are 0
        for x in [(i,j,k) for i in range(5) for j in range(5) \
            for k in range(5)]:
            self.assertFloatEqual(sum(result[x]), 0)
        assert any(result)
        #check that it works without_diag
        result = tree_threeway_rates(t, depths, without_diag=True)
        self.assertEqual(result.shape, (5,5,5,12))
        #check that it works with/without normalize
        #default: no normalization, so row sums shouldn't be 1 after 
        #omitting diagonal
        result = tree_threeway_rates(t, depths, without_diag=True)
        self.assertEqual(result.shape, (5,5,5,12))
        for x in [(i,j,k) for i in range(5) for j in range(5) \
            for k in range(5)]:
            assert sum(result[x]) == 0 or abs(sum(result[x]) - 1) > 0.01
        #...but if we tell it to normalize, row sums should be nearly 1
        #after omitting diagonal
        result = tree_threeway_rates(t, depths, without_diag=True, \
            normalize=True)
        self.assertEqual(result.shape, (5,5,5,12))
        for x in [(i,j,k) for i in range(5) for j in range(5) \
            for k in range(5)]:
                s = sum(result[x])
                if s != 0:
                    self.assertFloatEqual(s, 1)
    
    @occasionally_fails
    def test_tree_twoway_rates(self):
        """tree_twoway_rates should give plausible results on rand trees"""
        t = self.t1
        t.assignLength(0.05)
        t.Q = Rates.random(DnaPairs).normalize()
        t.assignQ()
        t.assignP()
        t.evolve(randint(0,4,100))
        t.makeIdIndex()
        result = tree_twoway_rates(t)
        self.assertEqual(result.shape, (5,5,16))
        #check that row sums are 0
        for x in [(i,j) for i in range(5) for j in range(5)]:
            self.assertFloatEqual(sum(result[x]), 0)
        #need to make sure we didn't just get an empty array
        self.assertGreaterThan((abs(result)).sum(), 0)
        #check that it works without_diag
        result = tree_twoway_rates(t, without_diag=True)
        self.assertEqual(result.shape, (5,5,12))
        #check that it works with/without normalize
        #default: no normalization, so row sums shouldn't be 1 after omitting
        #diagonal
        result = tree_twoway_rates(t, without_diag=True)
        self.assertEqual(result.shape, (5,5,12))
        #check that the row sums are not 1 before normalization (note that they
        #can be zero, though)
        sums_before = []
        for x in [(i,j) for i in range(5) for j in range(5)]:
            curr_sum = sum(result[x])
            sums_before.append(curr_sum)
        #...but if we tell it to normalize, row sums should be nearly 1
        #after omitting diagonal
        result = tree_twoway_rates(t, without_diag=True, \
            normalize=True)
        self.assertEqual(result.shape, (5,5,12))
        sums_after = []
        for x in [(i,j) for i in range(5) for j in range(5)]:
            curr_sum = sum(result[x])
            sums_after.append(curr_sum)
            if curr_sum != 0:
                self.assertFloatEqual(curr_sum, 1)
        try:
            self.assertFloatEqual(sums_before, sums_after)
        except AssertionError:
            pass
        else:
            raise AssertionError, "Expected different arrays before/after norm"
   
    def test_multivariate_normal_prob(self):
        """Multivariate normal prob should match R results"""
        cov = array([[3,1,2],[1,5,4],[2,4,6]])
        a = array([0,0,0])
        b = array([1,1,1])
        c = array([0.1, 0.2, 0.3])
        small_cov = cov/10.0
        
        mvp = multivariate_normal_prob
        self.assertFloatEqual(mvp(a, cov), 0.01122420)
        self.assertFloatEqual(mvp(a, cov, b), 0.009018894)
        self.assertFloatEqual(mvp(a, small_cov, b), 0.03982319)
        self.assertFloatEqual(mvp(c, small_cov, b), 0.06091317)
Beispiel #3
0
class OldPhyloNodeTests(TestCase):
    """Tests of the PhyloNode class -- these are all now methods of RangeNode."""
    def setUp(self):
        """Make a couple of standard trees"""
        self.t1 = DndParser('((a,(b,c)),(d,e))', RangeNode)
        #selt.t1 indices: ((0,(1,2)5)6,(3,4)7)8
    
    def test_makeIdIndex(self):
        """RangeNode makeIdIndex should assign ids to every node"""
        self.t1.makeIdIndex()
        result = self.t1.IdIndex
        nodes = list(self.t1.traverse(self_before=True))
        #check we got an entry for each node
        self.assertEqual(len(result), len(nodes))
        #check the ids are in the result
        for i in nodes:
            assert hasattr(i, 'Id')
            assert i.Id in result
            
    def test_assignQ_single_passed(self):
        """RangeNode assignQ should propagate single Q param down tree"""
        #should work if Q explicitly passed
        t = self.t1
        Q = ['a']
        t.assignQ(Q)
        for node in t.traverse(self_before=True):
            assert node.Q is Q

    def test_assignQ_single_set(self):
        """RangeNode assignQ should propagate single Q if set"""
        t = self.t1
        Q = ['a']
        assert not hasattr(t, 'Q')
        t.Q = Q
        t.assignQ()
        for node in t.traverse(self_before=True):
            assert node.Q is Q

    def test_assignQ_single_overwrite(self):
        """RangeNode assignQ should overwrite root Q if new Q passed"""
        t = self.t1
        Q = ['a']
        Q2 = ['b']
        t.Q = Q
        t.assignQ(Q2)
        for node in t.traverse(self_before=True):
            assert node.Q is Q2
            assert not node.Q is Q

    def test_assignQ_multiple(self):
        """RangeNode assignQ should propagate multiple Qs"""
        t = self.t1
        Q1 = ['a']
        Q2 = ['b']
        Q3 = ['c']
        t.makeIdIndex()
        t.IdIndex[7].Q = Q1
        t.IdIndex[5].Q = Q2
        t.assignQ(Q3) 
        result = [i.Q for i in t.traverse(self_after=True)]
        assert t.Q is Q3
        self.assertEqual(result, [Q3,Q2,Q2,Q2,Q3,Q1,Q1,Q1,Q3])

    def test_assignQ_multiple_overwrite(self):
        """RangeNode assignQ should allow overwrite"""
        t = self.t1
        Q1 = ['a']
        Q2 = ['b']
        Q3 = ['c']
        t.makeIdIndex()
        t.IdIndex[7].Q = Q1
        t.IdIndex[5].Q = Q2
        t.assignQ(Q3, overwrite=True)
        for i in t.traverse(self_after=True):
            assert i.Q is Q3

    def test_assignQ_special(self):
       """RangeNode assignQ should work with special Qs"""
       t = self.t1
       Q1 = 'a'
       Q2 = 'b'
       Q3 = 'c'
       t.makeIdIndex()
       special = {7:Q1, 1:Q2}
       #won't work if no Q at root
       self.assertRaises(ValueError, t.assignQ, special_qs=special)
       t.assignQ(Q3, special_qs=special)
       result = [i.Q for i in t.traverse(self_after=True)]
       self.assertEqual(result, ['c','b','c','c','c','a','a','a','c'])
        

    def test_assignP(self):
        """RangeNode assignP should work when Qs set."""
        t = self.t1
        for i in t.traverse(self_before=True):
            i.Length = random() * 0.5 #range 0 to 0.5
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignP()
        t.assignIds()
        for node in t.traverse(self_after=True):
            if node.Parent is not None:
                self.assertFloatEqual(average(1-diag(node.P._data), axis=0), \
                    node.Length)
       
    def test_assignLength(self):
        """RangeNode assignLength should set branch length"""
        t = self.t1
        t.assignLength(0.3)
        for i in t.traverse(self_before=True):
            self.assertEqual(i.Length, 0.3)
                
    def test_evolve(self):
        """RangeNode evolve should work on a starting vector"""
        t = self.t1
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignLength(0.1)
        t.assignP()
        start = array([1,0,2,1,0,0,2,1,2,0,1,2,1,0,2,0,0,3,0,2,1,0,3,1,0,2,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,3])
        t.evolve(start)
        for i in t.traverse():
            self.assertEqual(len(i.Sequence), len(start))
            self.assertNotEqual(i.Sequence, start)
        #WARNING: Doesn't test base freqs etc. at this point, but those aren't
        #really evolve()'s responsibity (tested as self.P.mutate(seq) once
        #P is set, which we've already demonstrated works.)

    def test_assignPs(self):
        """RangeNode assignPs should assign multiple scaled P matrices"""
        t = self.t1
        for i in t.traverse(self_before=True):
            i.Length = random() * 0.5 #range 0 to 0.5
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignPs([1, 0.5, 0.25])
        t.assignIds()
        for node in t.traverse(self_after=True):
            if node.Parent is not None:
                self.assertEqual(len(node.Ps), 3)
                self.assertFloatEqual(average(1-diag(node.Ps[0]._data), axis=0), \
                    node.Length)
                self.assertFloatEqual(average(1-diag(node.Ps[1]._data), axis=0), \
                    0.5*node.Length)
                self.assertFloatEqual(average(1-diag(node.Ps[2]._data), axis=0), \
                    0.25*node.Length)

    def test_evolveSeqs(self):
        """PhlyoNode evolveSeqs should evolve multiple sequences"""
        t = self.t1
        for i in t.traverse(self_before=True):
            i.Length = 0.5
        t.Q = Rates.random(DnaPairs)
        t.assignQ()
        t.assignPs([1, 1, 0.1])
        t.assignIds()
        orig_seqs = [array(i) for i in [randint(0,4,200), randint(0,4,200), \
            randint(0,4,200)]]
        t.evolveSeqs(orig_seqs)
        for node in t.traverse():   #only look at leaves
            if node.Parent is not None:
                self.assertEqual(len(node.Sequences), 3)
                for orig, new in zip(orig_seqs, node.Sequences):
                    self.assertEqual(len(orig), len(new))
                    self.assertNotEqual(orig, new)
                assert sum(orig_seqs[1]!=node.Sequences[1]) > \
                        sum(orig_seqs[2]!=node.Sequences[2])