def test_save_and_load(self): states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.75, 0.25], [0.25, 0.75]]) p_emission = array( [[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]]) markov_model_save = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) handle = StringIO() MarkovModel.save(markov_model_save, handle) handle.seek(0) markov_model_load = MarkovModel.load(handle) self.assertEqual(''.join(markov_model_load.states), states) self.assertEqual(''.join(markov_model_load.alphabet), alphabet) self.assertTrue(array_equal(markov_model_load.p_initial, p_initial)) self.assertTrue(array_equal (markov_model_load.p_transition, p_transition)) self.assertTrue(array_equal(markov_model_load.p_emission, p_emission))
def mostLikely(self, normal, island, dnastrand): states = "NR" alphabet = "AGTC" normal = [float(x)/100 for x in normal] island = [float(x)/100 for x in island] p_initial = [1.0, 0.0] p_initial = asarray(p_initial) p_transition = [] p_transition.append([1.0-normal[-1], normal[-1]]) p_transition.append([island[-1], 1.0-island[-1]]) p_transition = asarray(p_transition) p_emission = [] # 2x4 matrix p_emission.append(normal[:4]) p_emission.append(island[:4]) p_emission = asarray(p_emission) mm = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) x = MarkovModel.find_states(mm, dnastrand) states, x = x[0] return ''.join(states)
def test_readline_and_check_start(self): states = "NR" alphabet = "AGTC" markov_model = MarkovModel.MarkovModel(states, alphabet) line = "This is a \n string with two lines \n" handle = StringIO(line) start = "This is a \n" self.assertEqual(start, MarkovModel._readline_and_check_start(handle, start))
def test_topcoder5(self): # N states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.84, 0.16], [0.25, 0.75]]) p_emission = array([[0.26, 0.37, 0.08, 0.29], [0.31, 0.13, 0.33, 0.23]]) markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "T") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ["N"])
def test_topcoder1(self): # NNNN states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.90, 0.10], [0.20, 0.80]]) p_emission = array([[0.30, 0.20, 0.30, 0.20], [0.10, 0.40, 0.10, 0.40]]) markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "TGCC") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ["N", "N", "N", "N"])
def test_topcoder4(self): # NRRRRRRRRRR states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.55, 0.45], [0.15, 0.85]]) p_emission = array([[0.75, 0.03, 0.01, 0.21], [0.34, 0.11, 0.39, 0.16]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "TTAGCAGTGCG") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
def test_topcoder3(self): # NRRRRRRRRRRRNNNNRRRRRRRRR states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.75, 0.25], [0.25, 0.75]]) p_emission = array([[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "CCGTACTTACCCAGGACCGCAGTCC") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'N', 'N', 'N', 'N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
def test_topcoder2(self): # NNNRRRNNRRNRRN states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.56, 0.44], [0.25, 0.75]]) p_emission = array([[0.04, 0.14, 0.62, 0.20], [0.39, 0.15, 0.04, 0.42]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "CCTGAGTTAGTCGT") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['N', 'N', 'N', 'R', 'R', 'R', 'N', 'N', 'R', 'R', 'N', 'R', 'R', 'N'])
def test_baum_welch(self): states = ["CP", "IP"] alphabet = ["cola", "ice_t", "lem"] outputs = [ (2, 1, 0) ] p_initial = [1.0, 0.0000001] p_transition = [[0.7, 0.3], [0.5, 0.5]] p_emission = [[0.6, 0.1, 0.3], [0.1, 0.7, 0.2]] N, M = len(states), len(alphabet) x = MarkovModel._baum_welch(N, M, outputs, p_initial=p_initial, p_transition=p_transition, p_emission=p_emission ) p_initial, p_transition, p_emission = x markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial, p_transition, p_emission) self.assertEqual(markov_model.states, ["CP", "IP"]) self.assertEqual(markov_model.alphabet, ["cola", "ice_t", "lem"]) self.assertEqual(len(markov_model.p_initial), 2) self.assertAlmostEqual(markov_model.p_initial[0], 1.0, places=4) self.assertAlmostEqual(markov_model.p_initial[1], 0.0, places=4) self.assertEqual(len(markov_model.p_transition), 2) self.assertEqual(len(markov_model.p_transition[0]), 2) self.assertEqual(len(markov_model.p_transition[1]), 2) self.assertAlmostEqual(markov_model.p_transition[0][0], 0.02460365, places=4) self.assertAlmostEqual(markov_model.p_transition[0][1], 0.97539634, places=4) self.assertAlmostEqual(markov_model.p_transition[1][0], 1.0, places=4) self.assertAlmostEqual(markov_model.p_transition[1][1], 0.0, places=4) self.assertEqual(len(markov_model.p_emission), 2) self.assertEqual(len(markov_model.p_emission[0]), 3) self.assertEqual(len(markov_model.p_emission[1]), 3) self.assertAlmostEqual(markov_model.p_emission[0][0], 0.5) self.assertAlmostEqual(markov_model.p_emission[0][1], 0.0) self.assertAlmostEqual(markov_model.p_emission[0][2], 0.5) self.assertAlmostEqual(markov_model.p_emission[1][0], 0.0) self.assertAlmostEqual(markov_model.p_emission[1][1], 1.0) self.assertAlmostEqual(markov_model.p_emission[1][2], 0.0)
(2, 1, 0) ] print "Training HMM" p_initial = [1.0, 0.0000001] p_transition = [[0.7, 0.3], [0.5, 0.5]] p_emission = [[0.6, 0.1, 0.3], [0.1, 0.7, 0.2]] N, M = len(states), len(alphabet) x = MarkovModel._baum_welch(N, M, outputs, p_initial=p_initial, p_transition=p_transition, p_emission=p_emission ) p_initial, p_transition, p_emission = x mm = MarkovModel.MarkovModel(states, alphabet, p_initial, p_transition, p_emission) print_mm(mm) # Test Baum-Welch. This is hard because it is a non-deterministic # algorithm. Each run will result in different states having to # different emissions. In order to help this, we need to specify some # initial probabilities to bias the final results. This is not # implemented yet in the MarkovModel module. ## states = [ ## "state0", ## "state1", ## "state2", ## "state3", ## ]