def test_sequence_table():
    alphabet = Alphabet.create(b"ACGT", b"X")
    seqt = SequenceTable.create(alphabet)

    with pytest.raises(RuntimeError):
        seqt.normalize()

    seqt.add(Sequence.create(b"AGTG", alphabet), log(0.2))
    seqt.add(Sequence.create(b"T", alphabet), log(1.2))

    assert_allclose(seqt.lprob(Sequence.create(b"AGTG", alphabet)), log(0.2))
    assert_allclose(seqt.lprob(Sequence.create(b"T", alphabet)), log(1.2))
    assert lprob_is_zero(seqt.lprob(Sequence.create(b"", alphabet)))

    with pytest.raises(RuntimeError):
        seqt.lprob(Sequence.create(b"AT", Alphabet.create(b"AT", b"X")))

    with pytest.raises(RuntimeError):
        seqt.add(Sequence.create(b"AT", Alphabet.create(b"AT", b"X")), log(0.2))

    seqt.normalize()

    assert_allclose(seqt.lprob(Sequence.create(b"AGTG", alphabet)), log(0.2 / 1.4))
    assert_allclose(
        seqt.lprob(Sequence.create(b"T", alphabet)), log(1.2 / 1.4), rtol=1e-6
    )
Example #2
0
def test_alphabet():
    abc = Alphabet.create(b"ACGT", b"X")
    assert abc.length == 4

    assert abc.has_symbol(b"A")
    assert abc.has_symbol(b"C")
    assert abc.has_symbol(b"G")
    assert abc.has_symbol(b"T")

    assert abc.symbol_idx(b"A") == 0
    assert abc.symbol_idx(b"C") == 1
    assert abc.symbol_idx(b"G") == 2
    assert abc.symbol_idx(b"T") == 3

    assert abc.symbol_id(0) == b"A"
    assert abc.symbol_id(1) == b"C"
    assert abc.symbol_id(2) == b"G"
    assert abc.symbol_id(3) == b"T"

    assert abc.symbols == b"ACGT"

    assert str(abc) == "{ACGT}"
    assert repr(abc) == "<Alphabet:{ACGT}>"

    with pytest.raises(TypeError):
        Alphabet.create("ACGTç", b"X")

    with pytest.raises(RuntimeError):
        Alphabet.create("ACGTç".encode(), b"X")
Example #3
0
def imm_example():
    alphabet = Alphabet.create(b"AC", b"X")
    hmm = HMM.create(alphabet)

    S = MuteState.create(b"S", alphabet)
    hmm.add_state(S, log(1.0))

    E = MuteState.create(b"E", alphabet)
    hmm.add_state(E, lprob_zero())

    M1 = NormalState.create(b"M1", alphabet, [log(0.8), log(0.2)])
    hmm.add_state(M1, lprob_zero())

    M2 = NormalState.create(b"M2", alphabet, [log(0.4), log(0.6)])
    hmm.add_state(M2, lprob_zero())

    hmm.set_transition(S, M1, log(1.0))
    hmm.set_transition(M1, M2, log(1.0))
    hmm.set_transition(M2, E, log(1.0))
    hmm.set_transition(E, E, log(1.0))
    hmm.normalize()
    hmm.set_transition(E, E, lprob_zero())

    dp = hmm.create_dp(E)

    return {"hmm": hmm, "dp": dp, "alphabet": alphabet}
Example #4
0
def test_hmm_viterbi_2():
    alphabet = Alphabet.create(b"AC", b"X")
    hmm = HMM.create(alphabet)

    S = MuteState.create(b"S", alphabet)
    hmm.add_state(S, log(1.0))

    E = MuteState.create(b"E", alphabet)
    hmm.add_state(E, lprob_zero())

    M1 = NormalState.create(b"M1", alphabet, [log(0.8), log(0.2)])
    hmm.add_state(M1, lprob_zero())

    M2 = NormalState.create(b"M2", alphabet, [log(0.4), log(0.6)])
    hmm.add_state(M2, lprob_zero())

    hmm.set_transition(S, M1, log(1.0))
    hmm.set_transition(M1, M2, log(1.0))
    hmm.set_transition(M2, E, log(1.0))
    hmm.set_transition(E, E, log(1.0))
    hmm.normalize()
    hmm.set_transition(E, E, lprob_zero())

    dp = hmm.create_dp(E)
    dp_task = DPTask.create(dp)

    seq = Sequence.create(b"AC", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.48))

    seq = Sequence.create(b"AA", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.32))

    seq = Sequence.create(b"CA", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.08))

    seq = Sequence.create(b"CC", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.12))

    hmm.set_transition(M1, E, log(1.0))

    seq = Sequence.create(b"AC", alphabet)
    dp = hmm.create_dp(E)
    dp_task = DPTask.create(dp)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.48))

    seq = Sequence.create(b"AA", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.32))
Example #5
0
def test_mute_state():
    alphabet = Alphabet.create(b"ACGU", b"X")
    state = MuteState.create(b"S", alphabet)

    assert state.name == b"S"
    assert_allclose(state.lprob(Sequence.create(b"", alphabet)), log(1.0))
    assert lprob_is_zero(state.lprob(Sequence.create(b"AC", alphabet)))
    assert state.min_seq == 0
    assert state.max_seq == 0
    assert str(state) == "S"
    assert repr(state) == "<MuteState:S>"
Example #6
0
def test_table_state():
    alphabet = Alphabet.create(b"ACGU", b"X")
    seqt = SequenceTable.create(alphabet)
    seqt.add(Sequence.create(b"AUG", alphabet), log(0.8))
    seqt.add(Sequence.create(b"AUU", alphabet), log(0.4))

    state = TableState.create(b"M2", seqt)
    assert state.name == b"M2"
    assert_allclose(state.lprob(Sequence.create(b"AUG", alphabet)), log(0.8))
    assert_allclose(state.lprob(Sequence.create(b"AUU", alphabet)), log(0.4))
    assert str(state) == "M2"
    assert repr(state) == "<TableState:M2>"
Example #7
0
def test_hmm_trans_prob():
    alphabet = Alphabet.create(b"ACGU", b"X")
    hmm = HMM.create(alphabet)

    S = MuteState.create(b"S", alphabet)
    with pytest.raises(RuntimeError):
        hmm.set_start_lprob(S, log(0.4))
    hmm.add_state(S)

    E = MuteState.create(b"E", alphabet)
    with pytest.raises(RuntimeError):
        hmm.transition(S, E)

    with pytest.raises(ValueError):
        hmm.set_transition(S, E, lprob_zero())

    with pytest.raises(ValueError):
        hmm.set_transition(E, S, lprob_zero())

    with pytest.raises(ValueError):
        hmm.del_state(E)

    hmm.add_state(E)

    with pytest.raises(RuntimeError):
        hmm.set_transition(E, S, lprob_invalid())

    with pytest.raises(ValueError):
        hmm.normalize()

    hmm.set_transition(S, E, log(0.5))

    assert_allclose(hmm.transition(S, S), lprob_zero())
    assert_allclose(hmm.transition(S, E), log(0.5))
    assert_allclose(hmm.transition(E, S), lprob_zero())
    assert_allclose(hmm.transition(E, E), lprob_zero())

    with pytest.raises(ValueError):
        hmm.normalize()

    with pytest.raises(ValueError):
        hmm.normalize()

    hmm.set_start_lprob(S, log(0.4))
    hmm.set_transition(E, E, log(0.1))

    hmm.normalize()

    assert_allclose(hmm.transition(S, E), log(1.0))
    assert_allclose(hmm.transition(E, S), lprob_zero())
    assert_allclose(hmm.transition(S, S), lprob_zero())
    assert_allclose(hmm.transition(E, E), log(1.0))
Example #8
0
def test_normal_state():
    alphabet = Alphabet.create(b"ACGT", b"X")

    state = NormalState.create(
        b"M0",
        alphabet,
        [log(0.1), log(0.2), log(0.3), log(0.3)],
    )
    assert state.name == b"M0"
    assert_allclose(state.lprob(Sequence.create(b"A", alphabet)), log(0.1))
    assert_allclose(state.lprob(Sequence.create(b"C", alphabet)), log(0.2))
    assert_allclose(state.lprob(Sequence.create(b"G", alphabet)), log(0.3))
    assert_allclose(state.lprob(Sequence.create(b"T", alphabet)), log(0.3))
    assert state.min_seq == 1
    assert state.max_seq == 1

    with pytest.raises(RuntimeError):
        state.lprob(Sequence.create(b"T", Alphabet.create(b"ACGT", b"X")))

    assert lprob_is_zero(state.lprob(Sequence.create(b"AC", alphabet)))

    assert str(state) == "M0"
    assert repr(state) == "<NormalState:M0>"
Example #9
0
def test_sequence():
    alphabet = Alphabet.create(b"ACGT", b"X")
    seq = Sequence.create(b"ACAAAGATX", alphabet)

    assert len(seq) == 9
    assert bytes(seq) == b"ACAAAGATX"

    assert str(seq) == "ACAAAGATX"
    assert repr(seq) == "<Sequence:ACAAAGATX>"

    Sequence.create(b"ACGXXT", alphabet)

    with pytest.raises(RuntimeError):
        Sequence.create(b"ACGWT", alphabet)

    with pytest.raises(RuntimeError):
        Sequence.create("ACGTç".encode(), alphabet)
Example #10
0
def test_hmm_viterbi_1():
    alphabet = Alphabet.create(b"ACGU", b"X")
    hmm = HMM.create(alphabet)

    S = MuteState.create(b"S", alphabet)
    hmm.add_state(S, log(1.0))

    E = MuteState.create(b"E", alphabet)
    hmm.add_state(E, lprob_zero())

    M1 = NormalState.create(
        b"M1",
        alphabet,
        [log(0.8), log(0.2), lprob_zero(), lprob_zero()],
    )
    hmm.add_state(M1, lprob_zero())

    M2 = NormalState.create(
        b"M2",
        alphabet,
        [log(0.4 / 1.6), log(0.6 / 1.6), lprob_zero(), log(0.6 / 1.6)],
    )
    hmm.add_state(M2, lprob_zero())

    hmm.set_transition(S, M1, log(1.0))
    hmm.set_transition(M1, M2, log(1.0))
    hmm.set_transition(M2, E, log(1.0))
    hmm.set_transition(E, E, log(1.0))
    hmm.normalize()

    hmm.set_transition(E, E, lprob_zero())
    assert_allclose(hmm.transition(E, E), lprob_zero())
    assert_allclose(hmm.transition(S, S), lprob_zero())
    assert_allclose(hmm.transition(S, E), lprob_zero())
    assert_allclose(hmm.transition(E, S), lprob_zero())

    dp = hmm.create_dp(E)
    dp_task = DPTask.create(dp)
    seq = Sequence.create(b"AC", alphabet)
    dp_task.setup(seq)
    result = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, result.path), log(0.3))
Example #11
0
def test_hmm_states():
    alphabet = Alphabet.create(b"ACGU", b"X")
    hmm = HMM.create(alphabet)

    S = MuteState.create(b"S", alphabet)
    hmm.add_state(S)

    seqt = SequenceTable.create(alphabet)
    seqt.add(Sequence.create(b"AGU", alphabet), log(0.8))
    seqt.add(Sequence.create(b"AGG", alphabet), log(0.2))

    M = TableState.create(b"M", seqt)
    hmm.add_state(M)

    with pytest.raises(ValueError):
        hmm.add_state(S)

    with pytest.raises(ValueError):
        hmm.add_state(M)

    assert len(hmm.states()) == 2
Example #12
0
def test_hmm_loglikelihood():
    alphabet = Alphabet.create(b"ACGU", b"X")
    hmm = HMM.create(alphabet)

    S = MuteState.create(b"S", alphabet)
    hmm.add_state(S, log(1.0))

    E = MuteState.create(b"E", alphabet)
    hmm.add_state(E, lprob_zero())

    M1 = NormalState.create(
        b"M1",
        alphabet,
        [log(0.8), log(0.2), lprob_zero(), lprob_zero()],
    )
    hmm.add_state(M1, lprob_zero())

    M2 = NormalState.create(
        b"M2", alphabet, [log(0.4 / 1.6), log(0.6 / 1.6), lprob_zero(), log(0.6 / 1.6)]
    )
    hmm.add_state(M2, lprob_zero())

    hmm.set_transition(S, M1, log(1.0))
    hmm.set_transition(M1, M2, log(1.0))
    hmm.set_transition(M2, E, log(1.0))
    hmm.set_transition(E, E, log(1.0))
    hmm.normalize()

    p = hmm.loglikelihood(
        Sequence.create(b"AC", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, log(0.3))

    p = hmm.loglikelihood(
        Sequence.create(b"AA", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, log(0.2))

    p = hmm.loglikelihood(
        Sequence.create(b"AG", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"AU", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, log(0.3))

    p = hmm.loglikelihood(
        Sequence.create(b"CC", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, log(0.075))

    p = hmm.loglikelihood(
        Sequence.create(b"CA", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, log(0.05))

    p = hmm.loglikelihood(
        Sequence.create(b"CG", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"CG", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"CU", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, log(0.075))

    p = hmm.loglikelihood(
        Sequence.create(b"GC", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"GA", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"GG", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"GU", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"UC", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"UA", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"UG", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    p = hmm.loglikelihood(
        Sequence.create(b"UU", alphabet),
        Path.create(
            [
                Step.create(S, 0),
                Step.create(M1, 1),
                Step.create(M2, 1),
                Step.create(E, 0),
            ]
        ),
    )
    assert_allclose(p, lprob_zero())

    M3 = NormalState.create(
        b"M2",
        alphabet,
        [log(0.4), log(0.6), lprob_zero(), log(0.6)],
    )

    with pytest.raises(ValueError):
        hmm.loglikelihood(
            Sequence.create(b"UU", alphabet),
            Path.create(
                [
                    Step.create(S, 0),
                    Step.create(M1, 1),
                    Step.create(M3, 1),
                    Step.create(E, 0),
                ]
            ),
        )
Example #13
0
def test_hmm_viterbi_3():
    alphabet = Alphabet.create(b"AC", b"X")
    hmm = HMM.create(alphabet)

    S = MuteState.create(b"S", alphabet)
    hmm.add_state(S, log(1.0))

    E = MuteState.create(b"E", alphabet)
    hmm.add_state(E, lprob_zero())

    M1 = NormalState.create(b"M1", alphabet, [log(0.8), log(0.2)])
    hmm.add_state(M1, lprob_zero())

    D1 = MuteState.create(b"D1", alphabet)
    hmm.add_state(D1, lprob_zero())

    M2 = NormalState.create(b"M2", alphabet, [log(0.4), log(0.6)])
    hmm.add_state(M2, lprob_zero())

    D2 = MuteState.create(b"D2", alphabet)
    hmm.add_state(D2, lprob_zero())

    hmm.set_transition(S, M1, log(0.8))
    hmm.set_transition(S, D1, log(0.2))

    hmm.set_transition(M1, M2, log(0.8))
    hmm.set_transition(M1, D2, log(0.2))

    hmm.set_transition(D1, D2, log(0.2))
    hmm.set_transition(D1, M2, log(0.8))

    hmm.set_transition(D2, E, log(1.0))
    hmm.set_transition(M2, E, log(1.0))
    hmm.set_transition(E, E, log(1.0))
    hmm.normalize()
    hmm.set_transition(E, E, lprob_zero())

    dp = hmm.create_dp(E)
    dp_task = DPTask.create(dp)
    seq = Sequence.create(b"AC", alphabet)
    dp_task.setup(seq)
    result = dp.viterbi(dp_task)
    score = hmm.loglikelihood(seq, result.path)
    assert bytes(result.sequence) == b"AC"
    path = result.path
    steps = list(path)
    assert steps[0].seq_len == 0
    assert steps[1].seq_len == 1
    assert steps[2].seq_len == 1
    assert steps[3].seq_len == 0

    assert_allclose(score, log(0.3072))

    seq = Sequence.create(b"AA", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.2048))

    seq = Sequence.create(b"A", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.128))

    seq = Sequence.create(b"AC", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.3072))

    dp = hmm.create_dp(M2)
    dp_task = DPTask.create(dp)
    seq = Sequence.create(b"AC", alphabet)
    dp_task.setup(seq)
    r = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, r.path), log(0.3072))

    hmm.del_state(E)

    dp = hmm.create_dp(M2)
    dp_task = DPTask.create(dp)
    seq = Sequence.create(b"AC", alphabet)
    dp_task.setup(seq)
    result = dp.viterbi(dp_task)
    assert_allclose(hmm.loglikelihood(seq, result.path), log(0.3072))