def test_save_and_load(self):
        states = "NR"
        alphabet = "AGTC"
        p_initial = array([1.0, 0.0])
        p_transition = array([[0.75, 0.25], [0.25, 0.75]])
        p_emission = array(
            [[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]])
        markov_model_save = MarkovModel.MarkovModel(
            states,
            alphabet,
            p_initial,
            p_transition,
            p_emission)

        handle = StringIO()
        MarkovModel.save(markov_model_save, handle)
        handle.seek(0)
        markov_model_load = MarkovModel.load(handle)

        self.assertEqual(''.join(markov_model_load.states), states)
        self.assertEqual(''.join(markov_model_load.alphabet), alphabet)
        self.assertTrue(array_equal(markov_model_load.p_initial, p_initial))
        self.assertTrue(array_equal
                        (markov_model_load.p_transition, p_transition))
        self.assertTrue(array_equal(markov_model_load.p_emission, p_emission))
    def mostLikely(self, normal, island, dnastrand):
        states = "NR"
        alphabet = "AGTC"

        normal = [float(x)/100 for x in normal]
        island = [float(x)/100 for x in island]
        
        p_initial = [1.0, 0.0]
        p_initial = asarray(p_initial)

        p_transition = []
        p_transition.append([1.0-normal[-1], normal[-1]])
        p_transition.append([island[-1], 1.0-island[-1]])
        p_transition = asarray(p_transition)
        
        p_emission = []   # 2x4 matrix
        p_emission.append(normal[:4])
        p_emission.append(island[:4])
        p_emission = asarray(p_emission)

        mm = MarkovModel.MarkovModel(
            states, alphabet, p_initial, p_transition, p_emission)

        x = MarkovModel.find_states(mm, dnastrand)
        states, x = x[0]
        return ''.join(states)
Example #3
0
    def test_readline_and_check_start(self):
        states = "NR"
        alphabet = "AGTC"
        markov_model = MarkovModel.MarkovModel(states, alphabet)

        line = "This is a \n string with two lines \n"
        handle = StringIO(line)
        start = "This is a \n"
        self.assertEqual(start, MarkovModel._readline_and_check_start(handle, start))
Example #4
0
 def test_topcoder5(self):
     # N
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.84, 0.16], [0.25, 0.75]])
     p_emission = array([[0.26, 0.37, 0.08, 0.29], [0.31, 0.13, 0.33,
                                                    0.23]])
     markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial,
                                            p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "T")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ["N"])
Example #5
0
 def test_topcoder1(self):
     # NNNN
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.90, 0.10], [0.20, 0.80]])
     p_emission = array([[0.30, 0.20, 0.30, 0.20], [0.10, 0.40, 0.10,
                                                    0.40]])
     markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial,
                                            p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "TGCC")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ["N", "N", "N", "N"])
 def test_topcoder4(self):
     # NRRRRRRRRRR
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.55, 0.45],
                           [0.15, 0.85]])
     p_emission = array([[0.75, 0.03, 0.01, 0.21],
                         [0.34, 0.11, 0.39, 0.16]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "TTAGCAGTGCG")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
 def test_topcoder3(self):
     # NRRRRRRRRRRRNNNNRRRRRRRRR
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.75, 0.25],
                           [0.25, 0.75]])
     p_emission = array([[0.45, 0.36, 0.06, 0.13],
                         [0.24, 0.18, 0.12, 0.46]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "CCGTACTTACCCAGGACCGCAGTCC")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'N', 'N', 'N', 'N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
 def test_topcoder2(self):
     # NNNRRRNNRRNRRN
     states = "NR"
     alphabet = "AGTC"
     p_initial = array([1.0, 0.0])
     p_transition = array([[0.56, 0.44],
                           [0.25, 0.75]])
     p_emission = array([[0.04, 0.14, 0.62, 0.20],
                         [0.39, 0.15, 0.04, 0.42]])
     markov_model = MarkovModel.MarkovModel(
         states, alphabet, p_initial, p_transition, p_emission)
     states = MarkovModel.find_states(markov_model, "CCTGAGTTAGTCGT")
     self.assertEqual(len(states), 1)
     state_list, state_float = states[0]
     self.assertEqual(state_list, ['N', 'N', 'N', 'R', 'R', 'R', 'N', 'N', 'R', 'R', 'N', 'R', 'R', 'N'])
Example #9
0
 def test_baum_welch(self):
     states = ["CP", "IP"]
     alphabet = ["cola", "ice_t", "lem"]
     outputs = [
         (2, 1, 0)
     ]
     p_initial = [1.0, 0.0000001]
     p_transition = [[0.7, 0.3],
                     [0.5, 0.5]]
     p_emission = [[0.6, 0.1, 0.3],
                   [0.1, 0.7, 0.2]]
     N, M = len(states), len(alphabet)
     x = MarkovModel._baum_welch(N, M, outputs,
                                 p_initial=p_initial,
                                 p_transition=p_transition,
                                 p_emission=p_emission
                                 )
     p_initial, p_transition, p_emission = x
     markov_model = MarkovModel.MarkovModel(states, alphabet,
                                            p_initial, p_transition,
                                            p_emission)
     self.assertEqual(markov_model.states, ["CP", "IP"])
     self.assertEqual(markov_model.alphabet, ["cola", "ice_t", "lem"])
     self.assertEqual(len(markov_model.p_initial), 2)
     self.assertAlmostEqual(markov_model.p_initial[0], 1.0,
                            places=4)
     self.assertAlmostEqual(markov_model.p_initial[1], 0.0,
                            places=4)
     self.assertEqual(len(markov_model.p_transition), 2)
     self.assertEqual(len(markov_model.p_transition[0]), 2)
     self.assertEqual(len(markov_model.p_transition[1]), 2)
     self.assertAlmostEqual(markov_model.p_transition[0][0], 0.02460365,
                            places=4)
     self.assertAlmostEqual(markov_model.p_transition[0][1], 0.97539634,
                            places=4)
     self.assertAlmostEqual(markov_model.p_transition[1][0], 1.0,
                            places=4)
     self.assertAlmostEqual(markov_model.p_transition[1][1], 0.0,
                            places=4)
     self.assertEqual(len(markov_model.p_emission), 2)
     self.assertEqual(len(markov_model.p_emission[0]), 3)
     self.assertEqual(len(markov_model.p_emission[1]), 3)
     self.assertAlmostEqual(markov_model.p_emission[0][0], 0.5)
     self.assertAlmostEqual(markov_model.p_emission[0][1], 0.0)
     self.assertAlmostEqual(markov_model.p_emission[0][2], 0.5)
     self.assertAlmostEqual(markov_model.p_emission[1][0], 0.0)
     self.assertAlmostEqual(markov_model.p_emission[1][1], 1.0)
     self.assertAlmostEqual(markov_model.p_emission[1][2], 0.0)
Example #10
0
    (2, 1, 0)
    ]
print "Training HMM"
p_initial = [1.0, 0.0000001]
p_transition = [[0.7, 0.3],
                [0.5, 0.5]]
p_emission = [[0.6, 0.1, 0.3],
              [0.1, 0.7, 0.2]]
N, M = len(states), len(alphabet)
x = MarkovModel._baum_welch(N, M, outputs,
                            p_initial=p_initial,
                            p_transition=p_transition,
                            p_emission=p_emission
                            )
p_initial, p_transition, p_emission = x
mm = MarkovModel.MarkovModel(states, alphabet,
                             p_initial, p_transition, p_emission)
print_mm(mm)


# Test Baum-Welch.  This is hard because it is a non-deterministic
# algorithm.  Each run will result in different states having to
# different emissions.  In order to help this, we need to specify some
# initial probabilities to bias the final results.  This is not
# implemented yet in the MarkovModel module.

## states = [
##     "state0",
##     "state1",
##     "state2",
##     "state3",
##     ]