def imm_example(): alphabet = Alphabet.create(b"AC", b"X") hmm = HMM.create(alphabet) S = MuteState.create(b"S", alphabet) hmm.add_state(S, log(1.0)) E = MuteState.create(b"E", alphabet) hmm.add_state(E, lprob_zero()) M1 = NormalState.create(b"M1", alphabet, [log(0.8), log(0.2)]) hmm.add_state(M1, lprob_zero()) M2 = NormalState.create(b"M2", alphabet, [log(0.4), log(0.6)]) hmm.add_state(M2, lprob_zero()) hmm.set_transition(S, M1, log(1.0)) hmm.set_transition(M1, M2, log(1.0)) hmm.set_transition(M2, E, log(1.0)) hmm.set_transition(E, E, log(1.0)) hmm.normalize() hmm.set_transition(E, E, lprob_zero()) dp = hmm.create_dp(E) return {"hmm": hmm, "dp": dp, "alphabet": alphabet}
def test_fragment(): alphabet = BaseAlphabet.create(b"ACGT", b"X") seq = Sequence.create(b"ACAAAGATX", alphabet) S = MuteState.create(b"S", alphabet) E = MuteState.create(b"E", alphabet) M1 = NormalState.create( b"M1", alphabet, [log(0.8), log(0.2), log(0.01), log(0.01)], ) M2 = NormalState.create( b"M2", alphabet, [log(0.4), log(0.6), log(0.1), log(0.6)]) path = Path.create([ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0) ]) fragment = Fragment(seq, path) i = iter(fragment) frag_step = next(i) assert bytes(frag_step.sequence) == b"" assert frag_step.step.seq_len == 0 assert frag_step.step.state.name == S.name frag_step = next(i) assert bytes(frag_step.sequence) == b"A" assert frag_step.step.seq_len == 1 assert frag_step.step.state.name == M1.name
def test_hmm_viterbi_2(): alphabet = Alphabet.create(b"AC", b"X") hmm = HMM.create(alphabet) S = MuteState.create(b"S", alphabet) hmm.add_state(S, log(1.0)) E = MuteState.create(b"E", alphabet) hmm.add_state(E, lprob_zero()) M1 = NormalState.create(b"M1", alphabet, [log(0.8), log(0.2)]) hmm.add_state(M1, lprob_zero()) M2 = NormalState.create(b"M2", alphabet, [log(0.4), log(0.6)]) hmm.add_state(M2, lprob_zero()) hmm.set_transition(S, M1, log(1.0)) hmm.set_transition(M1, M2, log(1.0)) hmm.set_transition(M2, E, log(1.0)) hmm.set_transition(E, E, log(1.0)) hmm.normalize() hmm.set_transition(E, E, lprob_zero()) dp = hmm.create_dp(E) dp_task = DPTask.create(dp) seq = Sequence.create(b"AC", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.48)) seq = Sequence.create(b"AA", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.32)) seq = Sequence.create(b"CA", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.08)) seq = Sequence.create(b"CC", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.12)) hmm.set_transition(M1, E, log(1.0)) seq = Sequence.create(b"AC", alphabet) dp = hmm.create_dp(E) dp_task = DPTask.create(dp) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.48)) seq = Sequence.create(b"AA", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.32))
def test_hmm_viterbi_1(): alphabet = Alphabet.create(b"ACGU", b"X") hmm = HMM.create(alphabet) S = MuteState.create(b"S", alphabet) hmm.add_state(S, log(1.0)) E = MuteState.create(b"E", alphabet) hmm.add_state(E, lprob_zero()) M1 = NormalState.create( b"M1", alphabet, [log(0.8), log(0.2), lprob_zero(), lprob_zero()], ) hmm.add_state(M1, lprob_zero()) M2 = NormalState.create( b"M2", alphabet, [log(0.4 / 1.6), log(0.6 / 1.6), lprob_zero(), log(0.6 / 1.6)], ) hmm.add_state(M2, lprob_zero()) hmm.set_transition(S, M1, log(1.0)) hmm.set_transition(M1, M2, log(1.0)) hmm.set_transition(M2, E, log(1.0)) hmm.set_transition(E, E, log(1.0)) hmm.normalize() hmm.set_transition(E, E, lprob_zero()) assert_allclose(hmm.transition(E, E), lprob_zero()) assert_allclose(hmm.transition(S, S), lprob_zero()) assert_allclose(hmm.transition(S, E), lprob_zero()) assert_allclose(hmm.transition(E, S), lprob_zero()) dp = hmm.create_dp(E) dp_task = DPTask.create(dp) seq = Sequence.create(b"AC", alphabet) dp_task.setup(seq) result = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, result.path), log(0.3))
def test_normal_state(): alphabet = Alphabet.create(b"ACGT", b"X") state = NormalState.create( b"M0", alphabet, [log(0.1), log(0.2), log(0.3), log(0.3)], ) assert state.name == b"M0" assert_allclose(state.lprob(Sequence.create(b"A", alphabet)), log(0.1)) assert_allclose(state.lprob(Sequence.create(b"C", alphabet)), log(0.2)) assert_allclose(state.lprob(Sequence.create(b"G", alphabet)), log(0.3)) assert_allclose(state.lprob(Sequence.create(b"T", alphabet)), log(0.3)) assert state.min_seq == 1 assert state.max_seq == 1 with pytest.raises(RuntimeError): state.lprob(Sequence.create(b"T", Alphabet.create(b"ACGT", b"X"))) assert lprob_is_zero(state.lprob(Sequence.create(b"AC", alphabet))) assert str(state) == "M0" assert repr(state) == "<NormalState:M0>"
def test_hmm_loglikelihood(): alphabet = Alphabet.create(b"ACGU", b"X") hmm = HMM.create(alphabet) S = MuteState.create(b"S", alphabet) hmm.add_state(S, log(1.0)) E = MuteState.create(b"E", alphabet) hmm.add_state(E, lprob_zero()) M1 = NormalState.create( b"M1", alphabet, [log(0.8), log(0.2), lprob_zero(), lprob_zero()], ) hmm.add_state(M1, lprob_zero()) M2 = NormalState.create( b"M2", alphabet, [log(0.4 / 1.6), log(0.6 / 1.6), lprob_zero(), log(0.6 / 1.6)] ) hmm.add_state(M2, lprob_zero()) hmm.set_transition(S, M1, log(1.0)) hmm.set_transition(M1, M2, log(1.0)) hmm.set_transition(M2, E, log(1.0)) hmm.set_transition(E, E, log(1.0)) hmm.normalize() p = hmm.loglikelihood( Sequence.create(b"AC", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, log(0.3)) p = hmm.loglikelihood( Sequence.create(b"AA", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, log(0.2)) p = hmm.loglikelihood( Sequence.create(b"AG", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"AU", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, log(0.3)) p = hmm.loglikelihood( Sequence.create(b"CC", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, log(0.075)) p = hmm.loglikelihood( Sequence.create(b"CA", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, log(0.05)) p = hmm.loglikelihood( Sequence.create(b"CG", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"CG", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"CU", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, log(0.075)) p = hmm.loglikelihood( Sequence.create(b"GC", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"GA", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"GG", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"GU", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"UC", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"UA", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"UG", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) p = hmm.loglikelihood( Sequence.create(b"UU", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0), ] ), ) assert_allclose(p, lprob_zero()) M3 = NormalState.create( b"M2", alphabet, [log(0.4), log(0.6), lprob_zero(), log(0.6)], ) with pytest.raises(ValueError): hmm.loglikelihood( Sequence.create(b"UU", alphabet), Path.create( [ Step.create(S, 0), Step.create(M1, 1), Step.create(M3, 1), Step.create(E, 0), ] ), )
def test_hmm_viterbi_3(): alphabet = Alphabet.create(b"AC", b"X") hmm = HMM.create(alphabet) S = MuteState.create(b"S", alphabet) hmm.add_state(S, log(1.0)) E = MuteState.create(b"E", alphabet) hmm.add_state(E, lprob_zero()) M1 = NormalState.create(b"M1", alphabet, [log(0.8), log(0.2)]) hmm.add_state(M1, lprob_zero()) D1 = MuteState.create(b"D1", alphabet) hmm.add_state(D1, lprob_zero()) M2 = NormalState.create(b"M2", alphabet, [log(0.4), log(0.6)]) hmm.add_state(M2, lprob_zero()) D2 = MuteState.create(b"D2", alphabet) hmm.add_state(D2, lprob_zero()) hmm.set_transition(S, M1, log(0.8)) hmm.set_transition(S, D1, log(0.2)) hmm.set_transition(M1, M2, log(0.8)) hmm.set_transition(M1, D2, log(0.2)) hmm.set_transition(D1, D2, log(0.2)) hmm.set_transition(D1, M2, log(0.8)) hmm.set_transition(D2, E, log(1.0)) hmm.set_transition(M2, E, log(1.0)) hmm.set_transition(E, E, log(1.0)) hmm.normalize() hmm.set_transition(E, E, lprob_zero()) dp = hmm.create_dp(E) dp_task = DPTask.create(dp) seq = Sequence.create(b"AC", alphabet) dp_task.setup(seq) result = dp.viterbi(dp_task) score = hmm.loglikelihood(seq, result.path) assert bytes(result.sequence) == b"AC" path = result.path steps = list(path) assert steps[0].seq_len == 0 assert steps[1].seq_len == 1 assert steps[2].seq_len == 1 assert steps[3].seq_len == 0 assert_allclose(score, log(0.3072)) seq = Sequence.create(b"AA", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.2048)) seq = Sequence.create(b"A", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.128)) seq = Sequence.create(b"AC", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.3072)) dp = hmm.create_dp(M2) dp_task = DPTask.create(dp) seq = Sequence.create(b"AC", alphabet) dp_task.setup(seq) r = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, r.path), log(0.3072)) hmm.del_state(E) dp = hmm.create_dp(M2) dp_task = DPTask.create(dp) seq = Sequence.create(b"AC", alphabet) dp_task.setup(seq) result = dp.viterbi(dp_task) assert_allclose(hmm.loglikelihood(seq, result.path), log(0.3072))