def test_base_alphabet(): base = BaseAlphabet.create(b"ACGT", b"X") assert base.symbols == b"ACGT" assert str(base) == "{ACGT}" assert repr(base) == "<BaseAlphabet:{ACGT}>" with pytest.raises(RuntimeError): BaseAlphabet.create(b"ACGTK", b"X")
def test_fragment(): alphabet = BaseAlphabet.create(b"ACGT", b"X") seq = Sequence.create(b"ACAAAGATX", alphabet) S = MuteState.create(b"S", alphabet) E = MuteState.create(b"E", alphabet) M1 = NormalState.create( b"M1", alphabet, [log(0.8), log(0.2), log(0.01), log(0.01)], ) M2 = NormalState.create( b"M2", alphabet, [log(0.4), log(0.6), log(0.1), log(0.6)]) path = Path.create([ Step.create(S, 0), Step.create(M1, 1), Step.create(M2, 1), Step.create(E, 0) ]) fragment = Fragment(seq, path) i = iter(fragment) frag_step = next(i) assert bytes(frag_step.sequence) == b"" assert frag_step.step.seq_len == 0 assert frag_step.step.state.name == S.name frag_step = next(i) assert bytes(frag_step.sequence) == b"A" assert frag_step.step.seq_len == 1 assert frag_step.step.state.name == M1.name
def nmm_example(): abc = BaseAlphabet.create(b"ACGU", b"X") basep = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(abc) codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1)) B = MuteState.create(b"B", abc) M1 = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.02) M2 = FrameState.create(b"M2", basep, CodonMarg.create(codonp), 0.01) E = MuteState.create(b"E", abc) hmm = HMM.create(abc) hmm.add_state(B, log(0.5)) hmm.add_state(M1) hmm.add_state(M2) hmm.add_state(E) hmm.set_transition(B, M1, log(0.8)) hmm.set_transition(B, M2, log(0.2)) hmm.set_transition(M1, M2, log(0.1)) hmm.set_transition(M1, E, log(0.4)) hmm.set_transition(M2, E, log(0.3)) dp = hmm.create_dp(E) return {"hmm": hmm, "dp": dp, "alphabet": abc}
def test_hmm(): abc = BaseAlphabet.create(b"ACGU", b"X") baset = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(abc) codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1)) B = MuteState.create(b"B", abc) M1 = FrameState.create(b"M1", baset, CodonMarg.create(codonp), 0.02) M2 = FrameState.create(b"M2", baset, CodonMarg.create(codonp), 0.01) E = MuteState.create(b"E", abc) hmm = HMM.create(abc) hmm.add_state(B, log(0.5)) hmm.add_state(M1) hmm.add_state(M2) hmm.add_state(E) hmm.set_transition(B, M1, log(0.8)) hmm.set_transition(B, M2, log(0.2)) hmm.set_transition(M1, M2, log(0.1)) hmm.set_transition(M1, E, log(0.4)) hmm.set_transition(M2, E, log(0.3)) dp = hmm.create_dp(E) task = DPTask.create(dp) task.setup(Sequence.create(b"AUGAUU", abc)) result = dp.viterbi(task) loglik = hmm.loglikelihood(task.sequence, result.path) assert_allclose(loglik, -7.069201008427531)
def test_codon_iter(): base = BaseAlphabet.create(b"ACGT", b"X") codons = list(codon_iter(base)) assert len(codons) == 64 assert codons[0].symbols == b"AAA" assert codons[1].symbols == b"AAC"
def test_frame_state(): base = BaseAlphabet.create(b"ACGU", b"X") basep = BaseLprob.create(base, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AUG", base), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", base), log(0.1)) frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.0) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUA", base))) assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)), log(0.8)) assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)), log(0.1)) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AU", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"A", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUA", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAA", base))) codonp.normalize() frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.1) assert_allclose(frame_state.lprob(Sequence.create(b"AUA", base)), -6.905597115665666) assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)), -0.5347732882047062, rtol=1e-6) assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)), -2.5902373304999466, rtol=1e-6) assert_allclose(frame_state.lprob(Sequence.create(b"AU", base)), -2.9158434238698336) assert_allclose(frame_state.lprob(Sequence.create(b"A", base)), -5.914503505971854) assert_allclose(frame_state.lprob(Sequence.create(b"AUUA", base)), -6.881032208841384) assert_allclose(frame_state.lprob(Sequence.create(b"AUUAA", base)), -12.08828960987379) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAAA", base))) lprob, codon = frame_state.decode(Sequence.create(b"AUA", base)) assert_allclose(lprob, -7.128586690537968) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"AUAG", base)) assert_allclose(lprob, -4.813151489562624) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"A", base)) assert_allclose(lprob, -6.032286541628237) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"UUU", base)) assert_allclose(lprob, -8.110186062956258) assert codon.symbols == b"AUU"
def test_codon_state(): base = BaseAlphabet.create(b"ACGU", b"X") codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AUG", base), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", base), log(0.1)) state = CodonState.create(b"M1", codonp) assert state.name == b"M1" assert_allclose(state.lprob(Sequence.create(b"AUG", base)), log(0.8)) assert_allclose(state.lprob(Sequence.create(b"AUU", base)), log(0.1)) assert_allclose(state.lprob(Sequence.create(b"ACU", base)), -inf)
def test_base_lprob(): base = BaseAlphabet.create(b"ACGT", b"X") basep = BaseLprob.create(base, (log(0.1), log(0.2), log(0.3), log(0.4))) assert_allclose(basep.lprob(b"A"), log(0.1)) assert_allclose(basep.lprob(b"C"), log(0.2)) assert_allclose(basep.lprob(b"G"), log(0.3)) assert_allclose(basep.lprob(b"T"), log(0.4)) with pytest.raises(Exception): basep = BaseLprob.create(base, (log(0.1), log(0.2), log(0.3)))
def test_codon_marg(): base = BaseAlphabet.create(b"ACGT", b"X") codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) codonp.set_lprob(Codon.create(b"AGA", base), log(0.31)) codonp.set_lprob(Codon.create(b"CAA", base), log(0.40)) codonp.set_lprob(Codon.create(b"CAT", base), log(0.40)) codonm = CodonMarg.create(codonp) assert_allclose(codonm.lprob(Codon.create(b"CAT", base)), log(0.40)) assert_allclose(codonm.lprob(Codon.create(b"CAX", base)), log(0.80), rtol=1e-6) assert_allclose(codonm.lprob(Codon.create(b"XXX", base)), log(1.12), rtol=1e-6)
def test_codon(): base = BaseAlphabet.create(b"ACGT", b"X") codon = Codon.create(b"AAA", base) assert codon.symbols == b"AAA" codon.symbols = b"GTX" assert codon.symbols == b"GTX" with pytest.raises(ValueError): codon.symbols = b"GTGG" with pytest.raises(ValueError): codon.symbols = b"GT" with pytest.raises(ValueError): codon.symbols = b"ADA"
def test_codon_lprob(): base = BaseAlphabet.create(b"ACGT", b"X") codonp = CodonLprob.create(base) with pytest.raises(RuntimeError): codonp.normalize() codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01)) codonp.normalize() assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(1.0)) codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01)) assert lprob_is_zero(codonp.get_lprob(Codon.create(b"ACA", base))) with pytest.raises(RuntimeError): codonp.get_lprob(Codon.create(b"AXA", base))