class OldPhyloNodeTests(TestCase): """Tests of the PhyloNode class -- these are all now methods of RangeNode.""" def setUp(self): """Make a couple of standard trees""" self.t1 = DndParser('((a,(b,c)),(d,e))', RangeNode) #selt.t1 indices: ((0,(1,2)5)6,(3,4)7)8 def test_makeIdIndex(self): """RangeNode makeIdIndex should assign ids to every node""" self.t1.makeIdIndex() result = self.t1.IdIndex nodes = list(self.t1.traverse(self_before=True)) #check we got an entry for each node self.assertEqual(len(result), len(nodes)) #check the ids are in the result for i in nodes: assert hasattr(i, 'Id') assert i.Id in result def test_assignQ_single_passed(self): """RangeNode assignQ should propagate single Q param down tree""" #should work if Q explicitly passed t = self.t1 Q = ['a'] t.assignQ(Q) for node in t.traverse(self_before=True): assert node.Q is Q def test_assignQ_single_set(self): """RangeNode assignQ should propagate single Q if set""" t = self.t1 Q = ['a'] assert not hasattr(t, 'Q') t.Q = Q t.assignQ() for node in t.traverse(self_before=True): assert node.Q is Q def test_assignQ_single_overwrite(self): """RangeNode assignQ should overwrite root Q if new Q passed""" t = self.t1 Q = ['a'] Q2 = ['b'] t.Q = Q t.assignQ(Q2) for node in t.traverse(self_before=True): assert node.Q is Q2 assert not node.Q is Q def test_assignQ_multiple(self): """RangeNode assignQ should propagate multiple Qs""" t = self.t1 Q1 = ['a'] Q2 = ['b'] Q3 = ['c'] t.makeIdIndex() t.IdIndex[7].Q = Q1 t.IdIndex[5].Q = Q2 t.assignQ(Q3) result = [i.Q for i in t.traverse(self_after=True)] assert t.Q is Q3 self.assertEqual(result, [Q3, Q2, Q2, Q2, Q3, Q1, Q1, Q1, Q3]) def test_assignQ_multiple_overwrite(self): """RangeNode assignQ should allow overwrite""" t = self.t1 Q1 = ['a'] Q2 = ['b'] Q3 = ['c'] t.makeIdIndex() t.IdIndex[7].Q = Q1 t.IdIndex[5].Q = Q2 t.assignQ(Q3, overwrite=True) for i in t.traverse(self_after=True): assert i.Q is Q3 def test_assignQ_special(self): """RangeNode assignQ should work with special Qs""" t = self.t1 Q1 = 'a' Q2 = 'b' Q3 = 'c' t.makeIdIndex() special = {7: Q1, 1: Q2} #won't work if no Q at root self.assertRaises(ValueError, t.assignQ, special_qs=special) t.assignQ(Q3, special_qs=special) result = [i.Q for i in t.traverse(self_after=True)] self.assertEqual(result, ['c', 'b', 'c', 'c', 'c', 'a', 'a', 'a', 'c']) def test_assignP(self): """RangeNode assignP should work when Qs set.""" t = self.t1 for i in t.traverse(self_before=True): i.Length = random() * 0.5 #range 0 to 0.5 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignP() t.assignIds() for node in t.traverse(self_after=True): if node.Parent is not None: self.assertFloatEqual(average(1-diag(node.P._data), axis=0), \ node.Length) def test_assignLength(self): """RangeNode assignLength should set branch length""" t = self.t1 t.assignLength(0.3) for i in t.traverse(self_before=True): self.assertEqual(i.Length, 0.3) def test_evolve(self): """RangeNode evolve should work on a starting vector""" t = self.t1 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignLength(0.1) t.assignP() start = array([ 1, 0, 2, 1, 0, 0, 2, 1, 2, 0, 1, 2, 1, 0, 2, 0, 0, 3, 0, 2, 1, 0, 3, 1, 0, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3 ]) t.evolve(start) for i in t.traverse(): self.assertEqual(len(i.Sequence), len(start)) self.assertNotEqual(i.Sequence, start) #WARNING: Doesn't test base freqs etc. at this point, but those aren't #really evolve()'s responsibity (tested as self.P.mutate(seq) once #P is set, which we've already demonstrated works.) def test_assignPs(self): """RangeNode assignPs should assign multiple scaled P matrices""" t = self.t1 for i in t.traverse(self_before=True): i.Length = random() * 0.5 #range 0 to 0.5 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignPs([1, 0.5, 0.25]) t.assignIds() for node in t.traverse(self_after=True): if node.Parent is not None: self.assertEqual(len(node.Ps), 3) self.assertFloatEqual(average(1-diag(node.Ps[0]._data), axis=0), \ node.Length) self.assertFloatEqual(average(1-diag(node.Ps[1]._data), axis=0), \ 0.5*node.Length) self.assertFloatEqual(average(1-diag(node.Ps[2]._data), axis=0), \ 0.25*node.Length) def test_evolveSeqs(self): """PhlyoNode evolveSeqs should evolve multiple sequences""" t = self.t1 for i in t.traverse(self_before=True): i.Length = 0.5 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignPs([1, 1, 0.1]) t.assignIds() orig_seqs = [array(i) for i in [randint(0,4,200), randint(0,4,200), \ randint(0,4,200)]] t.evolveSeqs(orig_seqs) for node in t.traverse(): #only look at leaves if node.Parent is not None: self.assertEqual(len(node.Sequences), 3) for orig, new in zip(orig_seqs, node.Sequences): self.assertEqual(len(orig), len(new)) self.assertNotEqual(orig, new) assert sum(orig_seqs[1]!=node.Sequences[1]) > \ sum(orig_seqs[2]!=node.Sequences[2])
class analysisTests(TestCase): """Tests of top-level functions.""" def setUp(self): """Make a couple of standard trees""" self.t1 = DndParser('((a,(b,c)),(d,e))', RangeNode) #selt.t1 indices: ((0,(1,2)5)6,(3,4)7)8 def test_threeway_counts(self): """threeway_counts should produce correct count matrix""" self.t1.makeIdIndex() ind = self.t1.IdIndex ind[0].Sequence = array([0,0,0]) ind[1].Sequence = array([0,1,0]) ind[2].Sequence = array([1,0,1]) ind[3].Sequence = array([1,1,0]) ind[4].Sequence = array([1,1,1]) depths = self.t1.leafLcaDepths() result = tree_threeway_counts(self.t1, depths, ABPairs) #check we got the right number of comparisons self.assertEqual(len(result), 20) #check we got the right keys for k in [(1,2,0),(2,1,0),(0,1,3),(1,0,3),(0,1,4),(1,0,4),(0,2,3),\ (2,0,3),(0,2,4),(2,0,4),(1,2,3),(2,1,3),(1,2,4),(2,1,4),(3,4,1),\ (4,3,1),(3,4,2),(4,3,2)]: assert k in result #spot-check a few results self.assertEqual(result[(1,2,0)]._data, array([[2,1],[0,0]])) self.assertEqual(result[(2,1,0)]._data, array([[1,2],[0,0]])) self.assertEqual(result[(2,1,3)]._data, array([[0,1],[1,1]])) def test_twoway_counts(self): """twoway_counts should produce correct count matrix""" self.t1.makeIdIndex() ind = self.t1.IdIndex ind[0].Sequence = array([0,0,0]) ind[1].Sequence = array([0,1,0]) ind[2].Sequence = array([1,0,1]) ind[3].Sequence = array([1,1,0]) ind[4].Sequence = array([1,1,1]) depths = self.t1.leafLcaDepths() #check that it works with averaging result = tree_twoway_counts(self.t1, ABPairs) #check we got the right number of comparisons: average by default self.assertEqual(len(result), 10) #check we got the right keys for k in [(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]: assert k in result #spot-check a few results self.assertEqual(result[(0,1)]._data, array([[2,.5],[.5,0]])) self.assertEqual(result[(2,3)]._data, array([[0,1],[1,1]])) #check that it works when we don't average result = tree_twoway_counts(self.t1, ABPairs, average=False) self.assertEqual(len(result), 20) #check we got the right keys for k in [(0,1),(0,2),(0,3),(0,4),(1,2),(1,3),(1,4),(2,3),(2,4),(3,4)]: assert k in result #reverse should be in result too assert (k[1],k[0]) in result #spot-check values self.assertEqual(result[(0,1)]._data, array([[2,1],[0,0]])) self.assertEqual(result[(1,0)]._data, array([[2,0],[1,0]])) def test_counts_to_probs(self): """counts_to_probs should skip cases with zero rows""" counts = { (0,1): Counts(array([[0,1],[1,0]]), ABPairs), (1,2): Counts(array([[0,0],[1,0]]), ABPairs), #bad row (0,3): Counts(array([[0,0],[0,0]]), ABPairs), #bad row (0,4): Counts(array([[0.0,0.0],[0.0,0.0]]), ABPairs), #bad row (0,5): Counts(array([[0.1,0.3],[0.0,0.0]]), ABPairs), #bad row (3,4): Counts(array([[0.1,0.3],[0.4,0.1]]), ABPairs), (2,1): Counts(array([[0,5],[1,0]]), ABPairs), } result = counts_to_probs(counts) self.assertEqual(len(result), 3) self.assertFloatEqual(result[(0,1)]._data, array([[0,1],[1,0]])) self.assertFloatEqual(result[(3,4)]._data, \ array([[0.25,0.75],[0.8,0.2]])) self.assertFloatEqual(result[(2,1)]._data, array([[0,1],[1,0]])) def test_probs_to_rates(self): """probs_to_rates converts probs to rates, omitting problem cases""" probs = dict([(i, Probs.random(DnaPairs)) for i in range(100)]) rates = probs_to_rates(probs) #check we got at most the same number of items as in probs assert len(rates) <= len(probs) #check that we didn't get anything bad vals = rates.values() for v in vals: assert not v.isSignificantlyComplex() #check that we didn't miss anything good for key, val in probs.items(): if key not in rates: try: r = val.toRates() print r.isValid() assert r.isSignificantlyComplex() or (not r.isValid()) except (ZeroDivisionError, OverflowError, ValueError): pass def test_rates_to_array(self): """rates_to_array should pack rates into array correctly""" m1 = array([[-1,1,1,1],[2,-2,2,2],[3,3,-3,3],[1,2,3,-4]]) m2 = m1 * 2 m3 = m1 * 0.5 m4 = zeros((4,4)) m5 = array([0,0]) r1, r2, r3, r4, r5 = [Rates(i, DnaPairs) for i in m1,m2,m3,m4,m5] data = {(0,1,0):r1, (1,2,0):r2, (2,0,0):r3, (2,1,1):r4} #note that array can be, but need not be, floating point to_fill = zeros((3,3,3,16), 'float64') result = rates_to_array(data, to_fill) #check that the thnigs we deliberately set are OK self.assertEqual(to_fill[0][1][0], ravel(m1)) self.assertNotEqual(to_fill[0][1][0], ravel(m2)) self.assertEqual(to_fill[1,2,0], ravel(m2)) self.assertEqual(to_fill[2][0][0], ravel(m3)) self.assertEqual(to_fill[2][1][1], ravel(m4)) #check that everything else is zero nonzero = [(0,1,0),(1,2,0),(2,0,0)] for x in [(i, j, k) for i in range(3) for j in range(3) \ for k in range(3)]: if x not in nonzero: self.assertEqual(to_fill[x], zeros(16)) #check that it works omitting the diagonal to_fill = zeros((3,3,3,12), 'float64') result = rates_to_array(data, to_fill, without_diagonal=True) #check that the thnigs we deliberately set are OK m1_nodiag = array([[1,1,1],[2,2,2],[3,3,3],[1,2,3]]) self.assertEqual(to_fill[0][1][0], ravel(m1_nodiag)) self.assertNotEqual(to_fill[0][1][0], ravel(m1_nodiag*2)) self.assertEqual(to_fill[1,2,0], ravel(m1_nodiag*2)) self.assertEqual(to_fill[2][0][0], ravel(m1_nodiag*0.5)) self.assertEqual(to_fill[2][1][1], zeros(12)) #check that everything else is zero nonzero = [(0,1,0),(1,2,0),(2,0,0)] for x in [(i, j, k) for i in range(3) for j in range(3) \ for k in range(3)]: if x not in nonzero: self.assertEqual(to_fill[x], zeros(12)) @occasionally_fails def test_tree_threeway_rates(self): """tree_threeway_rates should give plausible results on rand trees""" t = self.t1 t.assignLength(0.05) t.Q = Rates.random(DnaPairs).normalize() t.assignQ() t.assignP() t.evolve(randint(0,4,100)) t.makeIdIndex() depths = t.leafLcaDepths() result = tree_threeway_rates(t, depths) self.assertEqual(result.shape, (5,5,5,16)) #check that row sums are 0 for x in [(i,j,k) for i in range(5) for j in range(5) \ for k in range(5)]: self.assertFloatEqual(sum(result[x]), 0) assert any(result) #check that it works without_diag result = tree_threeway_rates(t, depths, without_diag=True) self.assertEqual(result.shape, (5,5,5,12)) #check that it works with/without normalize #default: no normalization, so row sums shouldn't be 1 after #omitting diagonal result = tree_threeway_rates(t, depths, without_diag=True) self.assertEqual(result.shape, (5,5,5,12)) for x in [(i,j,k) for i in range(5) for j in range(5) \ for k in range(5)]: assert sum(result[x]) == 0 or abs(sum(result[x]) - 1) > 0.01 #...but if we tell it to normalize, row sums should be nearly 1 #after omitting diagonal result = tree_threeway_rates(t, depths, without_diag=True, \ normalize=True) self.assertEqual(result.shape, (5,5,5,12)) for x in [(i,j,k) for i in range(5) for j in range(5) \ for k in range(5)]: s = sum(result[x]) if s != 0: self.assertFloatEqual(s, 1) @occasionally_fails def test_tree_twoway_rates(self): """tree_twoway_rates should give plausible results on rand trees""" t = self.t1 t.assignLength(0.05) t.Q = Rates.random(DnaPairs).normalize() t.assignQ() t.assignP() t.evolve(randint(0,4,100)) t.makeIdIndex() result = tree_twoway_rates(t) self.assertEqual(result.shape, (5,5,16)) #check that row sums are 0 for x in [(i,j) for i in range(5) for j in range(5)]: self.assertFloatEqual(sum(result[x]), 0) #need to make sure we didn't just get an empty array self.assertGreaterThan((abs(result)).sum(), 0) #check that it works without_diag result = tree_twoway_rates(t, without_diag=True) self.assertEqual(result.shape, (5,5,12)) #check that it works with/without normalize #default: no normalization, so row sums shouldn't be 1 after omitting #diagonal result = tree_twoway_rates(t, without_diag=True) self.assertEqual(result.shape, (5,5,12)) #check that the row sums are not 1 before normalization (note that they #can be zero, though) sums_before = [] for x in [(i,j) for i in range(5) for j in range(5)]: curr_sum = sum(result[x]) sums_before.append(curr_sum) #...but if we tell it to normalize, row sums should be nearly 1 #after omitting diagonal result = tree_twoway_rates(t, without_diag=True, \ normalize=True) self.assertEqual(result.shape, (5,5,12)) sums_after = [] for x in [(i,j) for i in range(5) for j in range(5)]: curr_sum = sum(result[x]) sums_after.append(curr_sum) if curr_sum != 0: self.assertFloatEqual(curr_sum, 1) try: self.assertFloatEqual(sums_before, sums_after) except AssertionError: pass else: raise AssertionError, "Expected different arrays before/after norm" def test_multivariate_normal_prob(self): """Multivariate normal prob should match R results""" cov = array([[3,1,2],[1,5,4],[2,4,6]]) a = array([0,0,0]) b = array([1,1,1]) c = array([0.1, 0.2, 0.3]) small_cov = cov/10.0 mvp = multivariate_normal_prob self.assertFloatEqual(mvp(a, cov), 0.01122420) self.assertFloatEqual(mvp(a, cov, b), 0.009018894) self.assertFloatEqual(mvp(a, small_cov, b), 0.03982319) self.assertFloatEqual(mvp(c, small_cov, b), 0.06091317)
class OldPhyloNodeTests(TestCase): """Tests of the PhyloNode class -- these are all now methods of RangeNode.""" def setUp(self): """Make a couple of standard trees""" self.t1 = DndParser('((a,(b,c)),(d,e))', RangeNode) #selt.t1 indices: ((0,(1,2)5)6,(3,4)7)8 def test_makeIdIndex(self): """RangeNode makeIdIndex should assign ids to every node""" self.t1.makeIdIndex() result = self.t1.IdIndex nodes = list(self.t1.traverse(self_before=True)) #check we got an entry for each node self.assertEqual(len(result), len(nodes)) #check the ids are in the result for i in nodes: assert hasattr(i, 'Id') assert i.Id in result def test_assignQ_single_passed(self): """RangeNode assignQ should propagate single Q param down tree""" #should work if Q explicitly passed t = self.t1 Q = ['a'] t.assignQ(Q) for node in t.traverse(self_before=True): assert node.Q is Q def test_assignQ_single_set(self): """RangeNode assignQ should propagate single Q if set""" t = self.t1 Q = ['a'] assert not hasattr(t, 'Q') t.Q = Q t.assignQ() for node in t.traverse(self_before=True): assert node.Q is Q def test_assignQ_single_overwrite(self): """RangeNode assignQ should overwrite root Q if new Q passed""" t = self.t1 Q = ['a'] Q2 = ['b'] t.Q = Q t.assignQ(Q2) for node in t.traverse(self_before=True): assert node.Q is Q2 assert not node.Q is Q def test_assignQ_multiple(self): """RangeNode assignQ should propagate multiple Qs""" t = self.t1 Q1 = ['a'] Q2 = ['b'] Q3 = ['c'] t.makeIdIndex() t.IdIndex[7].Q = Q1 t.IdIndex[5].Q = Q2 t.assignQ(Q3) result = [i.Q for i in t.traverse(self_after=True)] assert t.Q is Q3 self.assertEqual(result, [Q3,Q2,Q2,Q2,Q3,Q1,Q1,Q1,Q3]) def test_assignQ_multiple_overwrite(self): """RangeNode assignQ should allow overwrite""" t = self.t1 Q1 = ['a'] Q2 = ['b'] Q3 = ['c'] t.makeIdIndex() t.IdIndex[7].Q = Q1 t.IdIndex[5].Q = Q2 t.assignQ(Q3, overwrite=True) for i in t.traverse(self_after=True): assert i.Q is Q3 def test_assignQ_special(self): """RangeNode assignQ should work with special Qs""" t = self.t1 Q1 = 'a' Q2 = 'b' Q3 = 'c' t.makeIdIndex() special = {7:Q1, 1:Q2} #won't work if no Q at root self.assertRaises(ValueError, t.assignQ, special_qs=special) t.assignQ(Q3, special_qs=special) result = [i.Q for i in t.traverse(self_after=True)] self.assertEqual(result, ['c','b','c','c','c','a','a','a','c']) def test_assignP(self): """RangeNode assignP should work when Qs set.""" t = self.t1 for i in t.traverse(self_before=True): i.Length = random() * 0.5 #range 0 to 0.5 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignP() t.assignIds() for node in t.traverse(self_after=True): if node.Parent is not None: self.assertFloatEqual(average(1-diag(node.P._data), axis=0), \ node.Length) def test_assignLength(self): """RangeNode assignLength should set branch length""" t = self.t1 t.assignLength(0.3) for i in t.traverse(self_before=True): self.assertEqual(i.Length, 0.3) def test_evolve(self): """RangeNode evolve should work on a starting vector""" t = self.t1 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignLength(0.1) t.assignP() start = array([1,0,2,1,0,0,2,1,2,0,1,2,1,0,2,0,0,3,0,2,1,0,3,1,0,2,0,0,0,0,0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,3]) t.evolve(start) for i in t.traverse(): self.assertEqual(len(i.Sequence), len(start)) self.assertNotEqual(i.Sequence, start) #WARNING: Doesn't test base freqs etc. at this point, but those aren't #really evolve()'s responsibity (tested as self.P.mutate(seq) once #P is set, which we've already demonstrated works.) def test_assignPs(self): """RangeNode assignPs should assign multiple scaled P matrices""" t = self.t1 for i in t.traverse(self_before=True): i.Length = random() * 0.5 #range 0 to 0.5 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignPs([1, 0.5, 0.25]) t.assignIds() for node in t.traverse(self_after=True): if node.Parent is not None: self.assertEqual(len(node.Ps), 3) self.assertFloatEqual(average(1-diag(node.Ps[0]._data), axis=0), \ node.Length) self.assertFloatEqual(average(1-diag(node.Ps[1]._data), axis=0), \ 0.5*node.Length) self.assertFloatEqual(average(1-diag(node.Ps[2]._data), axis=0), \ 0.25*node.Length) def test_evolveSeqs(self): """PhlyoNode evolveSeqs should evolve multiple sequences""" t = self.t1 for i in t.traverse(self_before=True): i.Length = 0.5 t.Q = Rates.random(DnaPairs) t.assignQ() t.assignPs([1, 1, 0.1]) t.assignIds() orig_seqs = [array(i) for i in [randint(0,4,200), randint(0,4,200), \ randint(0,4,200)]] t.evolveSeqs(orig_seqs) for node in t.traverse(): #only look at leaves if node.Parent is not None: self.assertEqual(len(node.Sequences), 3) for orig, new in zip(orig_seqs, node.Sequences): self.assertEqual(len(orig), len(new)) self.assertNotEqual(orig, new) assert sum(orig_seqs[1]!=node.Sequences[1]) > \ sum(orig_seqs[2]!=node.Sequences[2])