def test_mutate(self): """Probs mutate should return correct vector from input vector""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) #because of fp math in accumulate, can't predict boundaries exactly #so add/subtract eps to get the result we expect eps = 1e-6 # a b b a c c a b c seq = array([0, 1, 1, 0, 2, 2, 0, 1, 2]) random_vec = array([0, .01, .8 - eps, 1, 1, .3, .05, .9 + eps, .95]) self.assertEqual(m.mutate(seq, random_vec), \ # a a b c c a a c c array([0,0,1,2,2,0,0,2,2])) #check that freq. distribution is about right seqs = array([m.mutate(seq) for i in range(1000)]) #WARNING: bool operators return byte arrays, whose sums wrap at 256! zero_count = asarray(seqs == 0, 'int32') sums = sum(zero_count, axis=0) #expect: 500, 100, 100, 500, 300, 300, 500, 100, 300 #std dev = sqrt(npq), which is sqrt(250), sqrt(90), sqrt(210) means = array([500, 100, 100, 500, 300, 300, 500, 100, 300]) var = array([250, 90, 90, 250, 210, 210, 250, 90, 210]) three_sd = 3 * sqrt(var) for obs, exp, sd in zip(sums, means, three_sd): assert exp - 2 * sd < obs < exp + 2 * sd
def test_mutate(self): """Probs mutate should return correct vector from input vector""" a = Alphabet('abc')**2 m = Probs([0.5,0.25,0.25,0.1,0.8,0.1,0.3,0.6,0.1], a) #because of fp math in accumulate, can't predict boundaries exactly #so add/subtract eps to get the result we expect eps = 1e-6 # a b b a c c a b c seq = array([0,1,1,0,2,2,0,1,2]) random_vec = array([0,.01,.8-eps,1,1,.3,.05,.9+eps,.95]) self.assertEqual(m.mutate(seq, random_vec), \ # a a b c c a a c c array([0,0,1,2,2,0,0,2,2])) #check that freq. distribution is about right seqs = array([m.mutate(seq) for i in range(1000)]) #WARNING: bool operators return byte arrays, whose sums wrap at 256! zero_count = asarray(seqs == 0, 'int32') sums = sum(zero_count, axis=0) #expect: 500, 100, 100, 500, 300, 300, 500, 100, 300 #std dev = sqrt(npq), which is sqrt(250), sqrt(90), sqrt(210) means = array([500, 100, 100, 500, 300, 300, 500, 100, 300]) var = array([250, 90, 90, 250, 210, 210, 250, 90, 210]) three_sd = 3 * sqrt(var) for obs, exp, sd in zip(sums, means, three_sd): assert exp - 2*sd < obs < exp + 2*sd
def test_makeModel(self): """Probs makeModel should return correct substitution pattern""" a = Alphabet('abc')**2 m = Probs([0.5,0.25,0.25,0.1,0.8,0.1,0.3,0.6,0.1], a) obs = m.makeModel(array([0,1,1,0,2,2])) exp = array([[0.5,0.25,0.25],[0.1,0.8,0.1],[0.1,0.8,0.1],\ [0.5,0.25,0.25],[0.3,0.6,0.1],[0.3,0.6,0.1]]) self.assertEqual(obs, exp)
def test_toCounts(self): """Probs toCounts should return counts object w/ right numbers""" a = Alphabet('abc')**2 m = Probs([0.5,0.25,0.25,0.1,0.8,0.1,0.3,0.6,0.1], a) obs = m.toCounts(30) assert isinstance(obs, Counts) exp = Counts([[5.,2.5,2.5,1,8,1,3,6,1]], a) self.assertEqual(obs, exp)
def test_toCounts(self): """Probs toCounts should return counts object w/ right numbers""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) obs = m.toCounts(30) assert isinstance(obs, Counts) exp = Counts([[5., 2.5, 2.5, 1, 8, 1, 3, 6, 1]], a) self.assertEqual(obs, exp)
def test_makeModel(self): """Probs makeModel should return correct substitution pattern""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) obs = m.makeModel(array([0, 1, 1, 0, 2, 2])) exp = array([[0.5,0.25,0.25],[0.1,0.8,0.1],[0.1,0.8,0.1],\ [0.5,0.25,0.25],[0.3,0.6,0.1],[0.3,0.6,0.1]]) self.assertEqual(obs, exp)
def test_toRates(self): """Probs toRates should return log of probs, optionally normalized""" a = Alphabet('abc')**2 p = Probs([0.9,0.05,0.05,0.1,0.85,0.05,0.02,0.02,0.96], a) assert p.isValid() r = p.toRates() assert isinstance(r, Rates) assert r.isValid() assert not r.isComplex() self.assertEqual(r._data, logm(p._data)) r_norm = p.toRates(normalize=True) self.assertFloatEqual(trace(r_norm._data), -1.0)
def test_toRates(self): """Probs toRates should return log of probs, optionally normalized""" a = Alphabet('abc')**2 p = Probs([0.9, 0.05, 0.05, 0.1, 0.85, 0.05, 0.02, 0.02, 0.96], a) assert p.isValid() r = p.toRates() assert isinstance(r, Rates) assert r.isValid() assert not r.isComplex() self.assertEqual(r._data, logm(p._data)) r_norm = p.toRates(normalize=True) self.assertFloatEqual(trace(r_norm._data), -1.0)
def test_timeForSimilarity(self): """Rates timeToSimilarity should return correct time""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a) q = p.toRates() d = 0.5 t = q.timeForSimilarity(d) x = expm(q._data)(t) self.assertFloatEqual(average(diagonal(x), axis=0), d) t = q.timeForSimilarity(d, array([1 / 3.0] * 3)) x = expm(q._data)(t) self.assertFloatEqual(average(diagonal(x), axis=0), d) self.assertEqual(q.timeForSimilarity(1), 0)
def test_timeForSimilarity(self): """Rates timeToSimilarity should return correct time""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a) q = p.toRates() d = 0.5 t = q.timeForSimilarity(d) x = expm(q._data)(t) self.assertFloatEqual(average(diagonal(x), axis=0), d) t = q.timeForSimilarity(d, array([1/3.0]*3)) x = expm(q._data)(t) self.assertFloatEqual(average(diagonal(x), axis=0), d) self.assertEqual(q.timeForSimilarity(1), 0)
def test_isValid(self): """Probs isValid should return True if it's a prob matrix""" a = self.ab_pairs m = Probs([0.5, 0.5, 1, 0], a) self.assertEqual(m.isValid(), True) #fails if don't sum to 1 m = Probs([0.5, 0, 1, 0], a) self.assertEqual(m.isValid(), False) #fails if negative elements m = Probs([1, -1, 0, 1], a) self.assertEqual(m.isValid(), False)
def test_toSimilarProbs(self): """Rates toSimilarProbs should match individual steps""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a) q = p.toRates() self.assertEqual(q.toSimilarProbs(0.5), \ q.toProbs(q.timeForSimilarity(0.5))) #test a case that didn't work for DNA q = Rates(array( [[-0.64098451, 0.0217681 , 0.35576469, 0.26345171], [ 0.31144238, -0.90915091, 0.25825858, 0.33944995], [ 0.01578521, 0.43162879, -0.99257581, 0.54516182], [ 0.13229986, 0.04027147, 0.05817791, -0.23074925]]), DnaPairs) p = q.toSimilarProbs(0.66) self.assertFloatEqual(average(diagonal(p._data), axis=0), 0.66)
def test_toProbs(self): """Rates toProbs should return correct probability matrix""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.1, 0.85], a) q = p.toRates() self.assertEqual(q._data, logm(p._data)) p2 = q.toProbs() self.assertFloatEqual(p2._data, p._data) #test a case that didn't work for DNA q = Rates( array([[-0.64098451, 0.0217681, 0.35576469, 0.26345171], [0.31144238, -0.90915091, 0.25825858, 0.33944995], [0.01578521, 0.43162879, -0.99257581, 0.54516182], [0.13229986, 0.04027147, 0.05817791, -0.23074925]]), DnaPairs) self.assertFloatEqual(q.toProbs(0.5)._data, expm(q._data)(t=0.5))
def test_random_p_matrix_diag_vector(self): """Probs random should work with a vector diagonal""" for i in range(NUM_TESTS): diag = [0, 0.2, 0.6, 1.0] p = Probs.random(RnaPairs, diag)._data for i, d, row in zip(range(4), diag, p): self.assertFloatEqual(sum(row), 1.0) self.assertEqual(row[i], diag[i])
def test_toSimilarProbs(self): """Rates toSimilarProbs should match individual steps""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a) q = p.toRates() self.assertEqual(q.toSimilarProbs(0.5), \ q.toProbs(q.timeForSimilarity(0.5))) #test a case that didn't work for DNA q = Rates( array([[-0.64098451, 0.0217681, 0.35576469, 0.26345171], [0.31144238, -0.90915091, 0.25825858, 0.33944995], [0.01578521, 0.43162879, -0.99257581, 0.54516182], [0.13229986, 0.04027147, 0.05817791, -0.23074925]]), DnaPairs) p = q.toSimilarProbs(0.66) self.assertFloatEqual(average(diagonal(p._data), axis=0), 0.66)
def test_toProbs(self): """Rates toProbs should return correct probability matrix""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.1, 0.85], a) q = p.toRates() self.assertEqual(q._data, logm(p._data)) p2 = q.toProbs() self.assertFloatEqual(p2._data, p._data) #test a case that didn't work for DNA q = Rates(array( [[-0.64098451, 0.0217681 , 0.35576469, 0.26345171], [ 0.31144238, -0.90915091, 0.25825858, 0.33944995], [ 0.01578521, 0.43162879, -0.99257581, 0.54516182], [ 0.13229986, 0.04027147, 0.05817791, -0.23074925]]), DnaPairs) self.assertFloatEqual(q.toProbs(0.5)._data, expm(q._data)(t=0.5))
def test_random_p_matrix(self): """Probs random should return random Probsrows that sum to 1""" for i in range(NUM_TESTS): p = Probs.random(RnaPairs)._data for i in p: self.assertFloatEqual(sum(i), 1.0) #length should be 4 by default self.assertEqual(len(p), 4) self.assertEqual(len(p[0]), 4)
def test_toProbs(self): """Counts toProbs should return valid prob matrix.""" c = Counts([1, 2, 3, 4, 2, 2, 2, 2, 0.2, 0.4, 0.6, 0.8, 1, 0, 0, 0], RnaPairs) p = c.toProbs() assert isinstance(p, Probs) self.assertEqual(p, Probs([0.1,0.2,0.3,0.4,0.25,0.25,0.25,0.25, \ 0.1,0.2,0.3,0.4,1.0,0.0,0.0,0.0], RnaPairs)) self.assertEqual(p['U', 'U'], 0.1) self.assertEqual(p['G', 'U'], 1.0) self.assertEqual(p['G', 'G'], 0.0)
def test_random_p_matrix_diag(self): """Probs random should work with a scalar diagonal""" #if diagonal is 1, off-diagonal elements should be 0 for i in range(NUM_TESTS): p = Probs.random(RnaPairs, 1)._data self.assertEqual(p, identity(4, 'd')) #if diagonal is between 0 and 1, rows should sum to 1 for i in range(NUM_TESTS): p = Probs.random(RnaPairs, 0.5)._data for i in range(4): self.assertFloatEqual(sum(p[i]), 1.0) self.assertEqual(p[i][i], 0.5) assert min(p[i]) >= 0 assert max(p[i]) <= 1 #if diagonal > 1, rows should still sum to 1 for i in range(NUM_TESTS): p = Probs.random(RnaPairs, 2)._data for i in range(4): self.assertEqual(p[i][i], 2.0) self.assertFloatEqual(sum(p[i]), 1.0) assert min(p[i]) < 0
def test_isValid(self): """Probs isValid should return True if it's a prob matrix""" a = self.ab_pairs m = Probs([0.5,0.5,1,0], a) self.assertEqual(m.isValid(), True) #fails if don't sum to 1 m = Probs([0.5, 0, 1, 0], a) self.assertEqual(m.isValid(), False) #fails if negative elements m = Probs([1, -1, 0, 1], a) self.assertEqual(m.isValid(), False)
def test_probs_to_rates(self): """probs_to_rates converts probs to rates, omitting problem cases""" probs = dict([(i, Probs.random(DnaPairs)) for i in range(100)]) rates = probs_to_rates(probs) #check we got at most the same number of items as in probs assert len(rates) <= len(probs) #check that we didn't get anything bad vals = rates.values() for v in vals: assert not v.isSignificantlyComplex() #check that we didn't miss anything good for key, val in probs.items(): if key not in rates: try: r = val.toRates() print r.isValid() assert r.isSignificantlyComplex() or (not r.isValid()) except (ZeroDivisionError, OverflowError, ValueError): pass