def mostLikely(self, normal, island, dnastrand): states = "NR" alphabet = "AGTC" normal = [float(x)/100 for x in normal] island = [float(x)/100 for x in island] p_initial = [1.0, 0.0] p_initial = asarray(p_initial) p_transition = [] p_transition.append([1.0-normal[-1], normal[-1]]) p_transition.append([island[-1], 1.0-island[-1]]) p_transition = asarray(p_transition) p_emission = [] # 2x4 matrix p_emission.append(normal[:4]) p_emission.append(island[:4]) p_emission = asarray(p_emission) mm = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) x = MarkovModel.find_states(mm, dnastrand) states, x = x[0] return ''.join(states)
def test_topcoder5(self): # N states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.84, 0.16], [0.25, 0.75]]) p_emission = array([[0.26, 0.37, 0.08, 0.29], [0.31, 0.13, 0.33, 0.23]]) markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "T") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ["N"])
def test_topcoder1(self): # NNNN states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.90, 0.10], [0.20, 0.80]]) p_emission = array([[0.30, 0.20, 0.30, 0.20], [0.10, 0.40, 0.10, 0.40]]) markov_model = MarkovModel.MarkovModel(states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "TGCC") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ["N", "N", "N", "N"])
def test_topcoder5(self): # N states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.84, 0.16], [0.25, 0.75]]) p_emission = array([[0.26, 0.37, 0.08, 0.29], [0.31, 0.13, 0.33, 0.23]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "T") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ["N"])
def test_topcoder4(self): # NRRRRRRRRRR states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.55, 0.45], [0.15, 0.85]]) p_emission = array([[0.75, 0.03, 0.01, 0.21], [0.34, 0.11, 0.39, 0.16]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "TTAGCAGTGCG") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
def test_topcoder3(self): # NRRRRRRRRRRRNNNNRRRRRRRRR states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.75, 0.25], [0.25, 0.75]]) p_emission = array([[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "CCGTACTTACCCAGGACCGCAGTCC") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'N', 'N', 'N', 'N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
def test_topcoder2(self): # NNNRRRNNRRNRRN states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.56, 0.44], [0.25, 0.75]]) p_emission = array([[0.04, 0.14, 0.62, 0.20], [0.39, 0.15, 0.04, 0.42]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "CCTGAGTTAGTCGT") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['N', 'N', 'N', 'R', 'R', 'R', 'N', 'N', 'R', 'R', 'N', 'R', 'R', 'N'])
def test_topcoder1(self): # NNNN states = "NR" alphabet = "AGTC" p_initial = array([1.0, 0.0]) p_transition = array([[0.90, 0.10], [0.20, 0.80]]) p_emission = array([[0.30, 0.20, 0.30, 0.20], [0.10, 0.40, 0.10, 0.40]]) markov_model = MarkovModel.MarkovModel( states, alphabet, p_initial, p_transition, p_emission) states = MarkovModel.find_states(markov_model, "TGCC") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['N', 'N', 'N', 'N'])
def _eval_mm(self, motif_num=1, seq=''): """Return log_score_list of a sequence according to motif's HMM.""" mm = self.hmms_list[motif_num - 1] hidden_states = len(mm.states) seq_len = len(seq) if seq_len < hidden_states: raise ValueError('Sequence must be at least as long as the motif') score = list() for i in range(seq_len - hidden_states + 1): seq_segment = seq[i:i + hidden_states - 1] result = MarkovModel.find_states(mm, seq_segment) score.append(result[0][1]) eps = 1e-100 log_score = [math.log(x + eps) for x in score] # zero padding for i in range(len(seq) - len(score)): log_score.append(0) return log_score
def _get_occurence_indexandscore_mm(self, seq, motif_num): mm_i = self.hmms_list[motif_num] seq_len = len(seq) motif_len = len(mm_i.states) scores = list() start_indexes = list() for i in range(seq_len - motif_len + 1): segment_score = 0 for j in range(motif_len): letter = seq[i + j] segment_score += MarkovModel.find_states(mm_i, letter)[0][1] if segment_score > self.threshold: scores.append(segment_score) start_indexes.append(i + 1) last_indexes = [i + motif_len for i in start_indexes] data = zip(start_indexes, last_indexes, scores) sorted_data = sorted(data, key=self._get_key, reverse=True) top_result = sorted_data[:self.k] return top_result
def test_train_visible(self): states = ["0", "1", "2", "3"] alphabet = ["A", "C", "G", "T"] training_data = [ ("AACCCGGGTTTTTTT", "001112223333333"), ("ACCGTTTTTTT", "01123333333"), ("ACGGGTTTTTT", "01222333333"), ("ACCGTTTTTTTT", "011233333333"), ] markov_model = MarkovModel.train_visible(states, alphabet, training_data) states = MarkovModel.find_states(markov_model, "AACGTT") self.assertEqual(len(states), 1) state_list, state_float = states[0] self.assertEqual(state_list, ['0', '0', '1', '2', '3', '3']) self.assertAlmostEqual(state_float, 0.0082128906) self.assertEqual(markov_model.states, ['0', '1', '2', '3']) self.assertEqual(markov_model.alphabet, ['A', 'C', 'G', 'T']) self.assertEqual(len(markov_model.p_initial), 4) self.assertAlmostEqual(markov_model.p_initial[0], 1.0) self.assertAlmostEqual(markov_model.p_initial[1], 0.0) self.assertAlmostEqual(markov_model.p_initial[2], 0.0) self.assertAlmostEqual(markov_model.p_initial[3], 0.0) self.assertEqual(len(markov_model.p_transition), 4) self.assertEqual(len(markov_model.p_transition[0]), 4) self.assertEqual(len(markov_model.p_transition[1]), 4) self.assertEqual(len(markov_model.p_transition[2]), 4) self.assertEqual(len(markov_model.p_transition[3]), 4) self.assertAlmostEqual(markov_model.p_transition[0][0], 0.2) self.assertAlmostEqual(markov_model.p_transition[0][1], 0.8) self.assertAlmostEqual(markov_model.p_transition[0][2], 0.0) self.assertAlmostEqual(markov_model.p_transition[0][3], 0.0) self.assertAlmostEqual(markov_model.p_transition[1][0], 0.0) self.assertAlmostEqual(markov_model.p_transition[1][1], 0.5) self.assertAlmostEqual(markov_model.p_transition[1][2], 0.5) self.assertAlmostEqual(markov_model.p_transition[1][3], 0.0) self.assertAlmostEqual(markov_model.p_transition[2][0], 0.0) self.assertAlmostEqual(markov_model.p_transition[2][1], 0.0) self.assertAlmostEqual(markov_model.p_transition[2][2], 0.5) self.assertAlmostEqual(markov_model.p_transition[2][3], 0.5) self.assertAlmostEqual(markov_model.p_transition[3][0], 0.0) self.assertAlmostEqual(markov_model.p_transition[3][1], 0.0) self.assertAlmostEqual(markov_model.p_transition[3][2], 0.0) self.assertAlmostEqual(markov_model.p_transition[3][3], 1.0) self.assertEqual(len(markov_model.p_emission), 4) self.assertEqual(len(markov_model.p_emission[0]), 4) self.assertEqual(len(markov_model.p_emission[1]), 4) self.assertEqual(len(markov_model.p_emission[2]), 4) self.assertEqual(len(markov_model.p_emission[3]), 4) self.assertAlmostEqual(markov_model.p_emission[0][0], 0.666667, places=4) self.assertAlmostEqual(markov_model.p_emission[0][1], 0.111111, places=4) self.assertAlmostEqual(markov_model.p_emission[0][2], 0.111111, places=4) self.assertAlmostEqual(markov_model.p_emission[0][3], 0.111111, places=4) self.assertAlmostEqual(markov_model.p_emission[1][0], 0.083333, places=4) self.assertAlmostEqual(markov_model.p_emission[1][1], 0.750000, places=4) self.assertAlmostEqual(markov_model.p_emission[1][2], 0.083333, places=4) self.assertAlmostEqual(markov_model.p_emission[1][3], 0.083333, places=4) self.assertAlmostEqual(markov_model.p_emission[2][0], 0.083333, places=4) self.assertAlmostEqual(markov_model.p_emission[2][1], 0.083333, places=4) self.assertAlmostEqual(markov_model.p_emission[2][2], 0.750000, places=4) self.assertAlmostEqual(markov_model.p_emission[2][3], 0.083333, places=4) self.assertAlmostEqual(markov_model.p_emission[3][0], 0.031250, places=4) self.assertAlmostEqual(markov_model.p_emission[3][1], 0.031250, places=4) self.assertAlmostEqual(markov_model.p_emission[3][2], 0.031250, places=4) self.assertAlmostEqual(markov_model.p_emission[3][3], 0.906250, places=4)
alphabet = ["A", "C", "G", "T"] training_data = [ ("AACCCGGGTTTTTTT", "001112223333333"), ("ACCGTTTTTTT", "01123333333"), ("ACGGGTTTTTT", "01222333333"), ("ACCGTTTTTTTT", "011233333333"), ] print "Training HMM" mm = MarkovModel.train_visible(states, alphabet, training_data) print "Classifying" #print MarkovModel.find_states(mm, "AACGTT") #Don't just print this, as the float may have different #precision on different platforms. This returns a list #containing a tuple containing a list (fine), and a float. states = MarkovModel.find_states(mm, "AACGTT") for state_list, state_float in states: print "State %s, %0.10f" % (repr(state_list), state_float) print_mm(mm) print "TESTING baum welch" states = ["CP", "IP"] alphabet = ["cola", "ice_t", "lem"] outputs = [ (2, 1, 0) ] print "Training HMM" p_initial = [1.0, 0.0000001]
alphabet = ["A", "C", "G", "T"] training_data = [ ("AACCCGGGTTTTTTT", "001112223333333"), ("ACCGTTTTTTT", "01123333333"), ("ACGGGTTTTTT", "01222333333"), ("ACCGTTTTTTTT", "011233333333"), ] print "Training HMM" mm = MarkovModel.train_visible(states, alphabet, training_data) print "Classifying" #print MarkovModel.find_states(mm, "AACGTT") #Don't just print this, as the float may have different #precision on different platforms. This returns a list #containing a tuple containing a list (fine), and a float. states = MarkovModel.find_states(mm, "AACGTT") for state_list, state_float in states : print "State %s, %0.10f" % (repr(state_list), state_float) print_mm(mm) print "TESTING baum welch" states = ["CP", "IP"] alphabet = ["cola", "ice_t", "lem"] outputs = [ (2, 1, 0) ] print "Training HMM" p_initial = [1.0, 0.0000001]