Beispiel #1
0
def nmm_example():
    abc = BaseAlphabet.create(b"ACGU", b"X")
    basep = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25)))

    codonp = CodonLprob.create(abc)
    codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8))
    codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1))

    B = MuteState.create(b"B", abc)
    M1 = FrameState.create(b"M1", basep, CodonMarg.create(codonp), 0.02)
    M2 = FrameState.create(b"M2", basep, CodonMarg.create(codonp), 0.01)
    E = MuteState.create(b"E", abc)

    hmm = HMM.create(abc)
    hmm.add_state(B, log(0.5))
    hmm.add_state(M1)
    hmm.add_state(M2)
    hmm.add_state(E)

    hmm.set_transition(B, M1, log(0.8))
    hmm.set_transition(B, M2, log(0.2))
    hmm.set_transition(M1, M2, log(0.1))
    hmm.set_transition(M1, E, log(0.4))
    hmm.set_transition(M2, E, log(0.3))

    dp = hmm.create_dp(E)

    return {"hmm": hmm, "dp": dp, "alphabet": abc}
Beispiel #2
0
def _create_base_table(codonp: CodonLprob):
    base_abc = codonp.alphabet
    base_lprob = {base: lprob_zero() for base in base_abc.symbols}
    norm = log(3)
    for codon in codon_iter(base_abc):
        lprob = codonp.get_lprob(codon)
        triplet = codon.symbols

        base_lprob[triplet[0]] = lprob_add(base_lprob[triplet[0]],
                                           lprob - norm)
        base_lprob[triplet[1]] = lprob_add(base_lprob[triplet[1]],
                                           lprob - norm)
        base_lprob[triplet[2]] = lprob_add(base_lprob[triplet[2]],
                                           lprob - norm)

    assert len(base_lprob) == 4
    bases = base_abc.symbols
    assert len(bases) == 4
    return BaseLprob.create(
        base_abc,
        (
            base_lprob[bases[0]],
            base_lprob[bases[1]],
            base_lprob[bases[2]],
            base_lprob[bases[3]],
        ),
    )
Beispiel #3
0
def test_hmm():
    abc = BaseAlphabet.create(b"ACGU", b"X")
    baset = BaseLprob.create(abc, (log(0.25), log(0.25), log(0.25), log(0.25)))

    codonp = CodonLprob.create(abc)
    codonp.set_lprob(Codon.create(b"AUG", abc), log(0.8))
    codonp.set_lprob(Codon.create(b"AUU", abc), log(0.1))

    B = MuteState.create(b"B", abc)
    M1 = FrameState.create(b"M1", baset, CodonMarg.create(codonp), 0.02)
    M2 = FrameState.create(b"M2", baset, CodonMarg.create(codonp), 0.01)
    E = MuteState.create(b"E", abc)

    hmm = HMM.create(abc)
    hmm.add_state(B, log(0.5))
    hmm.add_state(M1)
    hmm.add_state(M2)
    hmm.add_state(E)

    hmm.set_transition(B, M1, log(0.8))
    hmm.set_transition(B, M2, log(0.2))
    hmm.set_transition(M1, M2, log(0.1))
    hmm.set_transition(M1, E, log(0.4))
    hmm.set_transition(M2, E, log(0.3))

    dp = hmm.create_dp(E)
    task = DPTask.create(dp)
    task.setup(Sequence.create(b"AUGAUU", abc))
    result = dp.viterbi(task)
    loglik = hmm.loglikelihood(task.sequence, result.path)
    assert_allclose(loglik, -7.069201008427531)
Beispiel #4
0
def test_frame_state():
    base = BaseAlphabet.create(b"ACGU", b"X")
    basep = BaseLprob.create(base,
                             (log(0.25), log(0.25), log(0.25), log(0.25)))

    codonp = CodonLprob.create(base)
    codonp.set_lprob(Codon.create(b"AUG", base), log(0.8))
    codonp.set_lprob(Codon.create(b"AUU", base), log(0.1))

    frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp),
                                    0.0)

    assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUA", base)))
    assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)), log(0.8))
    assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)), log(0.1))
    assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AU", base)))
    assert lprob_is_zero(frame_state.lprob(Sequence.create(b"A", base)))
    assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUA", base)))
    assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAA", base)))

    codonp.normalize()
    frame_state = FrameState.create(b"M1", basep, CodonMarg.create(codonp),
                                    0.1)

    assert_allclose(frame_state.lprob(Sequence.create(b"AUA", base)),
                    -6.905597115665666)
    assert_allclose(frame_state.lprob(Sequence.create(b"AUG", base)),
                    -0.5347732882047062,
                    rtol=1e-6)
    assert_allclose(frame_state.lprob(Sequence.create(b"AUU", base)),
                    -2.5902373304999466,
                    rtol=1e-6)
    assert_allclose(frame_state.lprob(Sequence.create(b"AU", base)),
                    -2.9158434238698336)
    assert_allclose(frame_state.lprob(Sequence.create(b"A", base)),
                    -5.914503505971854)
    assert_allclose(frame_state.lprob(Sequence.create(b"AUUA", base)),
                    -6.881032208841384)
    assert_allclose(frame_state.lprob(Sequence.create(b"AUUAA", base)),
                    -12.08828960987379)
    assert lprob_is_zero(frame_state.lprob(Sequence.create(b"AUUAAA", base)))

    lprob, codon = frame_state.decode(Sequence.create(b"AUA", base))
    assert_allclose(lprob, -7.128586690537968)
    assert codon.symbols == b"AUG"

    lprob, codon = frame_state.decode(Sequence.create(b"AUAG", base))
    assert_allclose(lprob, -4.813151489562624)
    assert codon.symbols == b"AUG"

    lprob, codon = frame_state.decode(Sequence.create(b"A", base))
    assert_allclose(lprob, -6.032286541628237)
    assert codon.symbols == b"AUG"

    lprob, codon = frame_state.decode(Sequence.create(b"UUU", base))
    assert_allclose(lprob, -8.110186062956258)
    assert codon.symbols == b"AUU"
Beispiel #5
0
def test_codon_state():
    base = BaseAlphabet.create(b"ACGU", b"X")
    codonp = CodonLprob.create(base)
    codonp.set_lprob(Codon.create(b"AUG", base), log(0.8))
    codonp.set_lprob(Codon.create(b"AUU", base), log(0.1))
    state = CodonState.create(b"M1", codonp)
    assert state.name == b"M1"
    assert_allclose(state.lprob(Sequence.create(b"AUG", base)), log(0.8))
    assert_allclose(state.lprob(Sequence.create(b"AUU", base)), log(0.1))
    assert_allclose(state.lprob(Sequence.create(b"ACU", base)), -inf)
Beispiel #6
0
def test_codon_marg():
    base = BaseAlphabet.create(b"ACGT", b"X")
    codonp = CodonLprob.create(base)

    codonp.set_lprob(Codon.create(b"AAA", base), log(0.01))
    codonp.set_lprob(Codon.create(b"AGA", base), log(0.31))
    codonp.set_lprob(Codon.create(b"CAA", base), log(0.40))
    codonp.set_lprob(Codon.create(b"CAT", base), log(0.40))

    codonm = CodonMarg.create(codonp)
    assert_allclose(codonm.lprob(Codon.create(b"CAT", base)), log(0.40))
    assert_allclose(codonm.lprob(Codon.create(b"CAX", base)),
                    log(0.80),
                    rtol=1e-6)
    assert_allclose(codonm.lprob(Codon.create(b"XXX", base)),
                    log(1.12),
                    rtol=1e-6)
def test_codon_lprob():
    base = BaseAlphabet.create(b"ACGT", b"X")
    codonp = CodonLprob.create(base)

    with pytest.raises(RuntimeError):
        codonp.normalize()

    codonp.set_lprob(Codon.create(b"AAA", base), log(0.01))
    assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01))

    codonp.normalize()
    assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(1.0))

    codonp.set_lprob(Codon.create(b"AAA", base), log(0.01))
    assert_allclose(codonp.get_lprob(Codon.create(b"AAA", base)), log(0.01))

    assert lprob_is_zero(codonp.get_lprob(Codon.create(b"ACA", base)))
    with pytest.raises(RuntimeError):
        codonp.get_lprob(Codon.create(b"AXA", base))
Beispiel #8
0
def _create_codon_prob(aminot: AminoLprob, gencode: CodonTable) -> CodonLprob:
    codonp = CodonLprob.create(gencode.base_alphabet)

    codon_lprobs = []
    lprob_norm = lprob_zero()
    for i in range(len(aminot.alphabet.symbols)):
        aa = aminot.alphabet.symbols[i:i + 1]
        lprob = aminot.lprob(aa)

        codons = gencode.codons(aa)
        if len(codons) == 0:
            continue

        norm = log(len(codons))
        for codon in codons:
            codon_lprobs.append((codon, lprob - norm))
            lprob_norm = lprob_add(lprob_norm, codon_lprobs[-1][1])

    for codon, lprob in codon_lprobs:
        codonp.set_lprob(codon, lprob - lprob_norm)

    return codonp