Example #1
0
    def test_mutate(self):
        """Probs mutate should return correct vector from input vector"""
        a = Alphabet('abc')**2
        m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a)
        #because of fp math in accumulate, can't predict boundaries exactly
        #so add/subtract eps to get the result we expect
        eps = 1e-6
        #            a b b a c c a b c
        seq = array([0, 1, 1, 0, 2, 2, 0, 1, 2])
        random_vec = array([0, .01, .8 - eps, 1, 1, .3, .05, .9 + eps, .95])
        self.assertEqual(m.mutate(seq, random_vec), \
            #      a a b c c a a c c

            array([0,0,1,2,2,0,0,2,2]))
        #check that freq. distribution is about right
        seqs = array([m.mutate(seq) for i in range(1000)])
        #WARNING: bool operators return byte arrays, whose sums wrap at 256!
        zero_count = asarray(seqs == 0, 'int32')
        sums = sum(zero_count, axis=0)
        #expect: 500, 100, 100, 500, 300, 300, 500, 100, 300
        #std dev = sqrt(npq), which is sqrt(250), sqrt(90), sqrt(210)
        means = array([500, 100, 100, 500, 300, 300, 500, 100, 300])
        var = array([250, 90, 90, 250, 210, 210, 250, 90, 210])
        three_sd = 3 * sqrt(var)
        for obs, exp, sd in zip(sums, means, three_sd):
            assert exp - 2 * sd < obs < exp + 2 * sd
Example #2
0
 def test_mutate(self):
     """Probs mutate should return correct vector from input vector"""
     a = Alphabet('abc')**2
     m = Probs([0.5,0.25,0.25,0.1,0.8,0.1,0.3,0.6,0.1], a)
     #because of fp math in accumulate, can't predict boundaries exactly
     #so add/subtract eps to get the result we expect
     eps = 1e-6
     #            a b b a c c a b c
     seq = array([0,1,1,0,2,2,0,1,2])
     random_vec = array([0,.01,.8-eps,1,1,.3,.05,.9+eps,.95])
     self.assertEqual(m.mutate(seq, random_vec), \
         #      a a b c c a a c c
         array([0,0,1,2,2,0,0,2,2]))
     #check that freq. distribution is about right
     seqs = array([m.mutate(seq) for i in range(1000)])
     #WARNING: bool operators return byte arrays, whose sums wrap at 256!
     zero_count = asarray(seqs == 0, 'int32')
     sums = sum(zero_count, axis=0)
     #expect: 500, 100, 100, 500, 300, 300, 500, 100, 300
     #std dev = sqrt(npq), which is sqrt(250), sqrt(90), sqrt(210)
     means = array([500, 100, 100, 500, 300, 300, 500, 100, 300])
     var   = array([250, 90, 90,  250, 210, 210, 250, 90, 210])
     three_sd = 3 * sqrt(var)
     for obs, exp, sd in zip(sums, means, three_sd):
         assert exp - 2*sd < obs < exp + 2*sd
Example #3
0
 def test_makeModel(self):
     """Probs makeModel should return correct substitution pattern"""
     a = Alphabet('abc')**2
     m = Probs([0.5,0.25,0.25,0.1,0.8,0.1,0.3,0.6,0.1], a)
     obs = m.makeModel(array([0,1,1,0,2,2]))
     exp = array([[0.5,0.25,0.25],[0.1,0.8,0.1],[0.1,0.8,0.1],\
         [0.5,0.25,0.25],[0.3,0.6,0.1],[0.3,0.6,0.1]])
     self.assertEqual(obs, exp)
Example #4
0
 def test_toCounts(self):
     """Probs toCounts should return counts object w/ right numbers"""
     a = Alphabet('abc')**2
     m = Probs([0.5,0.25,0.25,0.1,0.8,0.1,0.3,0.6,0.1], a)
     obs = m.toCounts(30)
     assert isinstance(obs, Counts)
     exp = Counts([[5.,2.5,2.5,1,8,1,3,6,1]], a)
     self.assertEqual(obs, exp)
Example #5
0
 def test_toCounts(self):
     """Probs toCounts should return counts object w/ right numbers"""
     a = Alphabet('abc')**2
     m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a)
     obs = m.toCounts(30)
     assert isinstance(obs, Counts)
     exp = Counts([[5., 2.5, 2.5, 1, 8, 1, 3, 6, 1]], a)
     self.assertEqual(obs, exp)
Example #6
0
 def test_makeModel(self):
     """Probs makeModel should return correct substitution pattern"""
     a = Alphabet('abc')**2
     m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a)
     obs = m.makeModel(array([0, 1, 1, 0, 2, 2]))
     exp = array([[0.5,0.25,0.25],[0.1,0.8,0.1],[0.1,0.8,0.1],\
         [0.5,0.25,0.25],[0.3,0.6,0.1],[0.3,0.6,0.1]])
     self.assertEqual(obs, exp)
Example #7
0
 def test_toRates(self):
     """Probs toRates should return log of probs, optionally normalized"""
     a = Alphabet('abc')**2
     p = Probs([0.9,0.05,0.05,0.1,0.85,0.05,0.02,0.02,0.96], a)
     assert p.isValid()
     r = p.toRates()
     assert isinstance(r, Rates)
     assert r.isValid()
     assert not r.isComplex()
     self.assertEqual(r._data, logm(p._data))
     r_norm = p.toRates(normalize=True)
     self.assertFloatEqual(trace(r_norm._data), -1.0)
Example #8
0
 def test_toRates(self):
     """Probs toRates should return log of probs, optionally normalized"""
     a = Alphabet('abc')**2
     p = Probs([0.9, 0.05, 0.05, 0.1, 0.85, 0.05, 0.02, 0.02, 0.96], a)
     assert p.isValid()
     r = p.toRates()
     assert isinstance(r, Rates)
     assert r.isValid()
     assert not r.isComplex()
     self.assertEqual(r._data, logm(p._data))
     r_norm = p.toRates(normalize=True)
     self.assertFloatEqual(trace(r_norm._data), -1.0)
Example #9
0
 def test_timeForSimilarity(self):
     """Rates timeToSimilarity should return correct time"""
     a = self.abc_pairs
     p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a)
     q = p.toRates()
     d = 0.5
     t = q.timeForSimilarity(d)
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     t = q.timeForSimilarity(d, array([1 / 3.0] * 3))
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     self.assertEqual(q.timeForSimilarity(1), 0)
Example #10
0
 def test_timeForSimilarity(self):
     """Rates timeToSimilarity should return correct time"""
     a = self.abc_pairs
     p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a)
     q = p.toRates()
     d = 0.5
     t = q.timeForSimilarity(d)
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     t = q.timeForSimilarity(d, array([1/3.0]*3))
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     self.assertEqual(q.timeForSimilarity(1), 0)
Example #11
0
 def test_isValid(self):
     """Probs isValid should return True if it's a prob matrix"""
     a = self.ab_pairs
     m = Probs([0.5, 0.5, 1, 0], a)
     self.assertEqual(m.isValid(), True)
     #fails if don't sum to 1
     m = Probs([0.5, 0, 1, 0], a)
     self.assertEqual(m.isValid(), False)
     #fails if negative elements
     m = Probs([1, -1, 0, 1], a)
     self.assertEqual(m.isValid(), False)
Example #12
0
    def test_toSimilarProbs(self):
        """Rates toSimilarProbs should match individual steps"""
        a = self.abc_pairs
        p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a)
        q = p.toRates()
        self.assertEqual(q.toSimilarProbs(0.5), \
            q.toProbs(q.timeForSimilarity(0.5)))

        #test a case that didn't work for DNA
        q = Rates(array(
            [[-0.64098451,  0.0217681 ,  0.35576469,  0.26345171],
             [ 0.31144238, -0.90915091,  0.25825858,  0.33944995],
             [ 0.01578521,  0.43162879, -0.99257581,  0.54516182],
             [ 0.13229986,  0.04027147,  0.05817791, -0.23074925]]),
            DnaPairs)
        p = q.toSimilarProbs(0.66)
        self.assertFloatEqual(average(diagonal(p._data), axis=0), 0.66)
Example #13
0
    def test_toProbs(self):
        """Rates toProbs should return correct probability matrix"""
        a = self.abc_pairs
        p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.1, 0.85], a)
        q = p.toRates()
        self.assertEqual(q._data, logm(p._data))
        p2 = q.toProbs()
        self.assertFloatEqual(p2._data, p._data)

        #test a case that didn't work for DNA
        q = Rates(
            array([[-0.64098451, 0.0217681, 0.35576469, 0.26345171],
                   [0.31144238, -0.90915091, 0.25825858, 0.33944995],
                   [0.01578521, 0.43162879, -0.99257581, 0.54516182],
                   [0.13229986, 0.04027147, 0.05817791, -0.23074925]]),
            DnaPairs)
        self.assertFloatEqual(q.toProbs(0.5)._data, expm(q._data)(t=0.5))
Example #14
0
 def test_random_p_matrix_diag_vector(self):
     """Probs random should work with a vector diagonal"""
     for i in range(NUM_TESTS):
         diag = [0, 0.2, 0.6, 1.0]
         p = Probs.random(RnaPairs, diag)._data
         for i, d, row in zip(range(4), diag, p):
             self.assertFloatEqual(sum(row), 1.0)
             self.assertEqual(row[i], diag[i])
Example #15
0
    def test_toSimilarProbs(self):
        """Rates toSimilarProbs should match individual steps"""
        a = self.abc_pairs
        p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a)
        q = p.toRates()
        self.assertEqual(q.toSimilarProbs(0.5), \
            q.toProbs(q.timeForSimilarity(0.5)))

        #test a case that didn't work for DNA
        q = Rates(
            array([[-0.64098451, 0.0217681, 0.35576469, 0.26345171],
                   [0.31144238, -0.90915091, 0.25825858, 0.33944995],
                   [0.01578521, 0.43162879, -0.99257581, 0.54516182],
                   [0.13229986, 0.04027147, 0.05817791, -0.23074925]]),
            DnaPairs)
        p = q.toSimilarProbs(0.66)
        self.assertFloatEqual(average(diagonal(p._data), axis=0), 0.66)
Example #16
0
 def test_toProbs(self):
     """Rates toProbs should return correct probability matrix"""
     a = self.abc_pairs
     p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.1, 0.85], a)
     q = p.toRates()
     self.assertEqual(q._data, logm(p._data))
     p2 = q.toProbs()
     self.assertFloatEqual(p2._data, p._data)
     
     #test a case that didn't work for DNA
     q = Rates(array(
         [[-0.64098451,  0.0217681 ,  0.35576469,  0.26345171],
          [ 0.31144238, -0.90915091,  0.25825858,  0.33944995],
          [ 0.01578521,  0.43162879, -0.99257581,  0.54516182],
          [ 0.13229986,  0.04027147,  0.05817791, -0.23074925]]),
         DnaPairs)
     self.assertFloatEqual(q.toProbs(0.5)._data, expm(q._data)(t=0.5))
Example #17
0
 def test_random_p_matrix_diag_vector(self):
     """Probs random should work with a vector diagonal"""
     for i in range(NUM_TESTS):
         diag = [0, 0.2, 0.6, 1.0]
         p = Probs.random(RnaPairs, diag)._data
         for i, d, row in zip(range(4), diag, p):
             self.assertFloatEqual(sum(row), 1.0)
             self.assertEqual(row[i], diag[i])
Example #18
0
 def test_random_p_matrix(self):
     """Probs random should return random Probsrows that sum to 1"""
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs)._data
         for i in p:
             self.assertFloatEqual(sum(i), 1.0)
         #length should be 4 by default
         self.assertEqual(len(p), 4)
         self.assertEqual(len(p[0]), 4)
Example #19
0
 def test_random_p_matrix(self):
     """Probs random should return random Probsrows that sum to 1"""
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs)._data
         for i in p:
             self.assertFloatEqual(sum(i), 1.0)
         #length should be 4 by default
         self.assertEqual(len(p), 4)
         self.assertEqual(len(p[0]), 4)
Example #20
0
 def test_toProbs(self):
     """Counts toProbs should return valid prob matrix."""
     c = Counts([1, 2, 3, 4, 2, 2, 2, 2, 0.2, 0.4, 0.6, 0.8, 1, 0, 0, 0],
                RnaPairs)
     p = c.toProbs()
     assert isinstance(p, Probs)
     self.assertEqual(p, Probs([0.1,0.2,0.3,0.4,0.25,0.25,0.25,0.25, \
         0.1,0.2,0.3,0.4,1.0,0.0,0.0,0.0], RnaPairs))
     self.assertEqual(p['U', 'U'], 0.1)
     self.assertEqual(p['G', 'U'], 1.0)
     self.assertEqual(p['G', 'G'], 0.0)
Example #21
0
 def test_random_p_matrix_diag(self):
     """Probs random should work with a scalar diagonal"""
     #if diagonal is 1, off-diagonal elements should be 0
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs, 1)._data
         self.assertEqual(p, identity(4, 'd'))
     #if diagonal is between 0 and 1, rows should sum to 1
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs, 0.5)._data
         for i in range(4):
             self.assertFloatEqual(sum(p[i]), 1.0)
             self.assertEqual(p[i][i], 0.5)
             assert min(p[i]) >= 0
             assert max(p[i]) <= 1
     #if diagonal > 1, rows should still sum to 1
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs, 2)._data
         for i in range(4):
             self.assertEqual(p[i][i], 2.0)
             self.assertFloatEqual(sum(p[i]), 1.0)
             assert min(p[i]) < 0
Example #22
0
 def test_random_p_matrix_diag(self):
     """Probs random should work with a scalar diagonal"""
     #if diagonal is 1, off-diagonal elements should be 0
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs, 1)._data
         self.assertEqual(p, identity(4, 'd'))
     #if diagonal is between 0 and 1, rows should sum to 1
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs, 0.5)._data
         for i in range(4):
             self.assertFloatEqual(sum(p[i]), 1.0)
             self.assertEqual(p[i][i], 0.5)
             assert min(p[i]) >= 0
             assert max(p[i]) <= 1
     #if diagonal > 1, rows should still sum to 1
     for i in range(NUM_TESTS):
         p = Probs.random(RnaPairs, 2)._data
         for i in range(4):
             self.assertEqual(p[i][i], 2.0)
             self.assertFloatEqual(sum(p[i]), 1.0)
             assert min(p[i]) < 0
Example #23
0
 def test_isValid(self):
     """Probs isValid should return True if it's a prob matrix"""
     a = self.ab_pairs
     m = Probs([0.5,0.5,1,0], a)
     self.assertEqual(m.isValid(), True)
     #fails if don't sum to 1
     m = Probs([0.5, 0, 1, 0], a)
     self.assertEqual(m.isValid(), False)
     #fails if negative elements
     m = Probs([1, -1, 0, 1], a)
     self.assertEqual(m.isValid(), False)
Example #24
0
 def test_probs_to_rates(self):
     """probs_to_rates converts probs to rates, omitting problem cases"""
     probs = dict([(i, Probs.random(DnaPairs)) for i in range(100)])
     rates = probs_to_rates(probs)
     #check we got at most the same number of items as in probs
     assert len(rates) <= len(probs)
     #check that we didn't get anything bad
     vals = rates.values()
     for v in vals:
         assert not v.isSignificantlyComplex()
     #check that we didn't miss anything good
     for key, val in probs.items():
         if key not in rates:
             try:
                 r = val.toRates()
                 print r.isValid()
                 assert r.isSignificantlyComplex() or (not r.isValid())
             except (ZeroDivisionError, OverflowError, ValueError):
                 pass