def test_isValid(self): """Probs isValid should return True if it's a prob matrix""" a = self.ab_pairs m = Probs([0.5, 0.5, 1, 0], a) self.assertEqual(m.isValid(), True) #fails if don't sum to 1 m = Probs([0.5, 0, 1, 0], a) self.assertEqual(m.isValid(), False) #fails if negative elements m = Probs([1, -1, 0, 1], a) self.assertEqual(m.isValid(), False)
def test_mutate(self): """Probs mutate should return correct vector from input vector""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) #because of fp math in accumulate, can't predict boundaries exactly #so add/subtract eps to get the result we expect eps = 1e-6 # a b b a c c a b c seq = array([0, 1, 1, 0, 2, 2, 0, 1, 2]) random_vec = array([0, .01, .8 - eps, 1, 1, .3, .05, .9 + eps, .95]) self.assertEqual(m.mutate(seq, random_vec), \ # a a b c c a a c c array([0,0,1,2,2,0,0,2,2])) #check that freq. distribution is about right seqs = array([m.mutate(seq) for i in range(1000)]) #WARNING: bool operators return byte arrays, whose sums wrap at 256! zero_count = asarray(seqs == 0, 'int32') sums = sum(zero_count, axis=0) #expect: 500, 100, 100, 500, 300, 300, 500, 100, 300 #std dev = sqrt(npq), which is sqrt(250), sqrt(90), sqrt(210) means = array([500, 100, 100, 500, 300, 300, 500, 100, 300]) var = array([250, 90, 90, 250, 210, 210, 250, 90, 210]) three_sd = 3 * sqrt(var) for obs, exp, sd in zip(sums, means, three_sd): assert exp - 2 * sd < obs < exp + 2 * sd
def test_toCounts(self): """Probs toCounts should return counts object w/ right numbers""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) obs = m.toCounts(30) assert isinstance(obs, Counts) exp = Counts([[5., 2.5, 2.5, 1, 8, 1, 3, 6, 1]], a) self.assertEqual(obs, exp)
def test_makeModel(self): """Probs makeModel should return correct substitution pattern""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) obs = m.makeModel(array([0, 1, 1, 0, 2, 2])) exp = array([[0.5,0.25,0.25],[0.1,0.8,0.1],[0.1,0.8,0.1],\ [0.5,0.25,0.25],[0.3,0.6,0.1],[0.3,0.6,0.1]]) self.assertEqual(obs, exp)
def test_toProbs(self): """Counts toProbs should return valid prob matrix.""" c = Counts([1, 2, 3, 4, 2, 2, 2, 2, 0.2, 0.4, 0.6, 0.8, 1, 0, 0, 0], RnaPairs) p = c.toProbs() assert isinstance(p, Probs) self.assertEqual(p, Probs([0.1,0.2,0.3,0.4,0.25,0.25,0.25,0.25, \ 0.1,0.2,0.3,0.4,1.0,0.0,0.0,0.0], RnaPairs)) self.assertEqual(p['U', 'U'], 0.1) self.assertEqual(p['G', 'U'], 1.0) self.assertEqual(p['G', 'G'], 0.0)
def test_toRates(self): """Probs toRates should return log of probs, optionally normalized""" a = Alphabet('abc')**2 p = Probs([0.9, 0.05, 0.05, 0.1, 0.85, 0.05, 0.02, 0.02, 0.96], a) assert p.isValid() r = p.toRates() assert isinstance(r, Rates) assert r.isValid() assert not r.isComplex() self.assertEqual(r._data, logm(p._data)) r_norm = p.toRates(normalize=True) self.assertFloatEqual(trace(r_norm._data), -1.0)
def test_timeForSimilarity(self): """Rates timeToSimilarity should return correct time""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a) q = p.toRates() d = 0.5 t = q.timeForSimilarity(d) x = expm(q._data)(t) self.assertFloatEqual(average(diagonal(x), axis=0), d) t = q.timeForSimilarity(d, array([1 / 3.0] * 3)) x = expm(q._data)(t) self.assertFloatEqual(average(diagonal(x), axis=0), d) self.assertEqual(q.timeForSimilarity(1), 0)
def test_toSimilarProbs(self): """Rates toSimilarProbs should match individual steps""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a) q = p.toRates() self.assertEqual(q.toSimilarProbs(0.5), \ q.toProbs(q.timeForSimilarity(0.5))) #test a case that didn't work for DNA q = Rates( array([[-0.64098451, 0.0217681, 0.35576469, 0.26345171], [0.31144238, -0.90915091, 0.25825858, 0.33944995], [0.01578521, 0.43162879, -0.99257581, 0.54516182], [0.13229986, 0.04027147, 0.05817791, -0.23074925]]), DnaPairs) p = q.toSimilarProbs(0.66) self.assertFloatEqual(average(diagonal(p._data), axis=0), 0.66)
def test_toProbs(self): """Rates toProbs should return correct probability matrix""" a = self.abc_pairs p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.1, 0.85], a) q = p.toRates() self.assertEqual(q._data, logm(p._data)) p2 = q.toProbs() self.assertFloatEqual(p2._data, p._data) #test a case that didn't work for DNA q = Rates( array([[-0.64098451, 0.0217681, 0.35576469, 0.26345171], [0.31144238, -0.90915091, 0.25825858, 0.33944995], [0.01578521, 0.43162879, -0.99257581, 0.54516182], [0.13229986, 0.04027147, 0.05817791, -0.23074925]]), DnaPairs) self.assertFloatEqual(q.toProbs(0.5)._data, expm(q._data)(t=0.5))