def nmm_example(): abc = BaseAlphabet.create(b"ACGU", b"X") basep = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(abc) codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1)) B = MuteState.create(b"B", abc) M1 = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.02) M2 = FrameState.create(b"M2", basep, CodonMarg.create(codonp), 0.01) E = MuteState.create(b"E", abc) hmm = HMM.create(abc) hmm.add_state(B, log(0.5)) hmm.add_state(M1) hmm.add_state(M2) hmm.add_state(E) hmm.set_transition(B, M1, log(0.8)) hmm.set_transition(B, M2, log(0.2)) hmm.set_transition(M1, M2, log(0.1)) hmm.set_transition(M1, E, log(0.4)) hmm.set_transition(M2, E, log(0.3)) dp = hmm.create_dp(E) return {"hmm": hmm, "dp": dp, "alphabet": abc}
def test_hmm(): abc = BaseAlphabet.create(b"ACGU", b"X") baset = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(abc) codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1)) B = MuteState.create(b"B", abc) M1 = FrameState.create(b"M1", baset, CodonMarg.create(codonp), 0.02) M2 = FrameState.create(b"M2", baset, CodonMarg.create(codonp), 0.01) E = MuteState.create(b"E", abc) hmm = HMM.create(abc) hmm.add_state(B, log(0.5)) hmm.add_state(M1) hmm.add_state(M2) hmm.add_state(E) hmm.set_transition(B, M1, log(0.8)) hmm.set_transition(B, M2, log(0.2)) hmm.set_transition(M1, M2, log(0.1)) hmm.set_transition(M1, E, log(0.4)) hmm.set_transition(M2, E, log(0.3)) dp = hmm.create_dp(E) task = DPTask.create(dp) task.setup(Sequence.create(b"AUGAUU", abc)) result = dp.viterbi(task) loglik = hmm.loglikelihood(task.sequence, result.path) assert_allclose(loglik, -7.069201008427531)
def test_frame_state(): base = BaseAlphabet.create(b"ACGU", b"X") basep = BaseLprob.create(base, (log(0.25), log(0.25), log(0.25), log(0.25))) codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AUG", base), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", base), log(0.1)) frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.0) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUA", base))) assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)), log(0.8)) assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)), log(0.1)) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AU", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"A", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUA", base))) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAA", base))) codonp.normalize() frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.1) assert_allclose(frame_state.lprob(Sequence.create(b"AUA", base)), -6.905597115665666) assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)), -0.5347732882047062, rtol=1e-6) assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)), -2.5902373304999466, rtol=1e-6) assert_allclose(frame_state.lprob(Sequence.create(b"AU", base)), -2.9158434238698336) assert_allclose(frame_state.lprob(Sequence.create(b"A", base)), -5.914503505971854) assert_allclose(frame_state.lprob(Sequence.create(b"AUUA", base)), -6.881032208841384) assert_allclose(frame_state.lprob(Sequence.create(b"AUUAA", base)), -12.08828960987379) assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAAA", base))) lprob, codon = frame_state.decode(Sequence.create(b"AUA", base)) assert_allclose(lprob, -7.128586690537968) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"AUAG", base)) assert_allclose(lprob, -4.813151489562624) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"A", base)) assert_allclose(lprob, -6.032286541628237) assert codon.symbols == b"AUG" lprob, codon = frame_state.decode(Sequence.create(b"UUU", base)) assert_allclose(lprob, -8.110186062956258) assert codon.symbols == b"AUU"
def test_codon_state(): base = BaseAlphabet.create(b"ACGU", b"X") codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AUG", base), log(0.8)) codonp.set_lprob(Codon.create(b"AUU", base), log(0.1)) state = CodonState.create(b"M1", codonp) assert state.name == b"M1" assert_allclose(state.lprob(Sequence.create(b"AUG", base)), log(0.8)) assert_allclose(state.lprob(Sequence.create(b"AUU", base)), log(0.1)) assert_allclose(state.lprob(Sequence.create(b"ACU", base)), -inf)
def __init__( self, base_abc: Union[DNAAlphabet, RNAAlphabet], amino_abc: AminoAlphabet, gencode: Optional[GeneticCode] = None, ): self._base_alphabet = base_abc self._amino_alphabet = amino_abc if gencode is None: gencode = GeneticCode("Standard") table = translation_table(gencode) def replace(seq: bytes): if isinstance(base_abc, RNAAlphabet): seq = seq.replace(b"T", b"U") return seq self._codons: Dict[bytes, List[Codon]] = {} self._start_codons = [] for t, a in table.forward_table.items(): triplet = replace(t.encode()) aa = a.encode() if aa not in self._codons: self._codons[aa] = [] codon = Codon.create(triplet, base_abc) if t in table.start_codons: self._start_codons.append(codon) self._codons[aa].append(codon) self._amino_acid: Dict[Codon, bytes] = {} for aa, codons in self._codons.items(): for codon in codons: self._amino_acid[codon] = aa self._stop_codons = [] for t in table.stop_codons: triplet = replace(t.encode()) self._stop_codons.append(Codon.create(triplet, base_abc)) assert len(self._amino_acid) <= 64 assert len(self._amino_acid) + len(self._stop_codons) == 64
def null_amino_stream(self): base_abc = self._codon_table.base_alphabet cstream = self.null_codon_stream aminos = [] for i in range(0, len(cstream), 3): codon = Codon.create(cstream[i : i + 3].encode(), base_abc) amino = self._codon_table.amino_acid(codon) aminos.append(amino.decode()) return "".join(aminos)
def test_codon(): base = BaseAlphabet.create(b"ACGT", b"X") codon = Codon.create(b"AAA", base) assert codon.symbols == b"AAA" codon.symbols = b"GTX" assert codon.symbols == b"GTX" with pytest.raises(ValueError): codon.symbols = b"GTGG" with pytest.raises(ValueError): codon.symbols = b"GT" with pytest.raises(ValueError): codon.symbols = b"ADA"
def test_codon_marg(): base = BaseAlphabet.create(b"ACGT", b"X") codonp = CodonLprob.create(base) codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) codonp.set_lprob(Codon.create(b"AGA", base), log(0.31)) codonp.set_lprob(Codon.create(b"CAA", base), log(0.40)) codonp.set_lprob(Codon.create(b"CAT", base), log(0.40)) codonm = CodonMarg.create(codonp) assert_allclose(codonm.lprob(Codon.create(b"CAT", base)), log(0.40)) assert_allclose(codonm.lprob(Codon.create(b"CAX", base)), log(0.80), rtol=1e-6) assert_allclose(codonm.lprob(Codon.create(b"XXX", base)), log(1.12), rtol=1e-6)
def test_codon_lprob(): base = BaseAlphabet.create(b"ACGT", b"X") codonp = CodonLprob.create(base) with pytest.raises(RuntimeError): codonp.normalize() codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01)) codonp.normalize() assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(1.0)) codonp.set_lprob(Codon.create(b"AAA", base), log(0.01)) assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01)) assert lprob_is_zero(codonp.get_lprob(Codon.create(b"ACA", base))) with pytest.raises(RuntimeError): codonp.get_lprob(Codon.create(b"AXA", base))