コード例 #1
0
def test_hmmer_prof():
    buffer = pkg_resources.open_binary(hmmer_reader.data, "three-profs.hmm.gz")

    content = gzip.decompress(buffer.read()).decode()
    hmmfile = open_hmmer(StringIO(content))

    hmm = hmmfile.read_model()
    assert hmm.header == "HMMER3/f [3.1b2 | February 2015]"
    assert dict(hmm.metadata)["LENG"] == "40"
    assert hmm.M == 40
    assert hmm.alphabet == "ACDEFGHIKLMNPQRSTVWY"
    assert abs(hmm.match(2)["V"] - -2.72416) < 1e-6
    assert abs(hmm.insert(2)["V"] - -2.98518) < 1e-6
    assert abs(hmm.trans(3)["DD"] - -0.9551) < 1e-6
    assert abs(hmm.compo["N"] - -3.18565) < 1e-6

    output = str(hmm)
    assert "SM    hmmsearch -Z 45638612 -E 1000 --cpu 4 HMM pfamseq" in output

    hmm = hmmfile.read_model()
    assert dict(hmm.metadata)["LENG"] == "235"

    hmm = hmmfile.read_model()
    assert dict(hmm.metadata)["LENG"] == "449"

    buffer.close()
コード例 #2
0
ファイル: test_profile.py プロジェクト: horta/iseq
def test_standard_profile_nonhomo_and_homologous(PF03373):
    with open_hmmer(PF03373) as reader:
        hmmer = create_profile(reader.read_profile())

    alphabet = hmmer.alphabet
    seq = Sequence(b"KKKPGKEDNNK", alphabet)
    assert_equal(hmmer.multiple_hits, True)
    r = hmmer.search(seq)
    assert_allclose(r.loglikelihood, 10.707618955640605)
    frags = r.fragments
    assert_equal(len(frags), 2)
    assert_equal(frags[0].homologous, False)
    assert_equal(frags[0].sequence.symbols, b"KKK")
    assert_equal(frags[1].homologous, True)
    assert_equal(frags[1].sequence.symbols, b"PGKEDNNK")

    hmmer.multiple_hits = False
    assert_equal(hmmer.multiple_hits, False)
    r = hmmer.search(seq)
    assert_allclose(r.loglikelihood, 10.96037578075283)
    frags = r.fragments
    assert_equal(len(frags), 2)
    assert_equal(frags[0].homologous, False)
    assert_equal(frags[0].sequence.symbols, b"KKK")
    assert_equal(frags[1].homologous, True)
    assert_equal(frags[1].sequence.symbols, b"PGKEDNNK")
コード例 #3
0
ファイル: press.py プロジェクト: EBI-Metagenomics/deciphon-py
def press(hmm_filepath: Union[Path, str]):
    hmm_filepath = Path(hmm_filepath)
    base_abc = nmm.DNAAlphabet()

    total = num_models(hmm_filepath)
    epsilon = 0.01
    bin_filepath = hmm_filepath.with_suffix(".dcp").name.encode()

    with Output.create(bin_filepath) as output:
        with open_hmmer(hmm_filepath) as parser:
            for hmmer3 in tqdm(parser, total=total, desc="Pressing"):
                model = HMMERModel(hmmer3)
                data = dict(hmmer3.metadata)
                mt = Metadata.create(data["NAME"].encode(),
                                     data["ACC"].encode())
                prof = create_profile(model, base_abc, 0, epsilon)

                nprof = DCPProfile.create(base_abc, mt)

                hmm = prof.alt_model.hmm
                dp = hmm.create_dp(prof.alt_model.special_node.T)
                nprof.append_model(imm.Model.create(hmm, dp))

                hmm = prof.null_model.hmm
                dp = hmm.create_dp(prof.null_model.state)
                nprof.append_model(imm.Model.create(hmm, dp))

                output.write(nprof)
コード例 #4
0
ファイル: test_profile.py プロジェクト: horta/iseq
def test_frame_profile_frame1(PF03373):
    with open_hmmer(PF03373) as reader:
        hmmer = create_profile(reader.read_profile())

    # most_likely_seq = b"PGKEDNNK"
    most_likely_rna_seq = b"CCU GGU AAA GAA GAU AAU AAC AAA"
    most_likely_rna_seq = most_likely_rna_seq.replace(b" ", b"")
コード例 #5
0
def test_hmmer_reader_invalid_file():
    buffer = pkg_resources.open_text(hmmer_reader.data, "A0ALD9.fasta")
    hmmfile = open_hmmer(buffer)

    with pytest.raises(ParsingError):
        hmmfile.read_model()

    buffer.close()
コード例 #6
0
def test_hmmer_reader_corrupted_file():
    buffer = pkg_resources.open_text(hmmer_reader.data,
                                     "PF02545.hmm.br.corrupted")
    hmmfile = open_hmmer(buffer)

    with pytest.raises(UnicodeDecodeError):
        hmmfile.read_model()

    buffer.close()
コード例 #7
0
ファイル: test_profile.py プロジェクト: horta/iseq
def test_standard_profile_unihit_homologous_3(PF03373):
    with open_hmmer(PF03373) as reader:
        hmmer = create_profile(reader.read_profile())

    alphabet = hmmer.alphabet
    seq = Sequence(b"PGKEPNNK", alphabet)
    r = hmmer.search(seq)
    assert_allclose(r.loglikelihood, 6.883636719423446)
    frags = r.fragments
    assert_equal(len(frags), 1)
    frag = frags[0]
    assert_equal(frag.homologous, True)
    assert_equal(frag.sequence.symbols, seq.symbols)
コード例 #8
0
ファイル: test_profile.py プロジェクト: horta/iseq
def test_standard_profile_unihit_homologous_2(PF03373):
    with open_hmmer(PF03373) as reader:
        hmmer = create_profile(reader.read_profile())

    alphabet = hmmer.alphabet
    seq = Sequence(b"PGKENNK", alphabet)
    r = hmmer.search(seq)
    assert_allclose(r.loglikelihood, 3.299501501364073)
    frags = r.fragments
    assert_equal(len(frags), 1)
    frag = frags[0]
    assert_equal(frag.homologous, True)
    assert_equal(frag.sequence.symbols, seq.symbols)
    assert_equal(str(frag), "[PGKENNK]")
コード例 #9
0
def test_hmmer_reader():
    buffer = pkg_resources.open_binary(hmmer_reader.data, "PF02545.hmm.gz")

    content = gzip.decompress(buffer.read()).decode()
    hmmfile = open_hmmer(StringIO(content))

    hmm = hmmfile.read_model()
    assert hmm.header == "HMMER3/f [3.1b2 | February 2015]"
    assert dict(hmm.metadata)["LENG"] == "166"
    assert hmm.M == 166
    assert hmm.alphabet == "ACDEFGHIKLMNPQRSTVWY"
    assert abs(hmm.match(2)["V"] - -2.0152) < 1e-6
    assert abs(hmm.insert(2)["V"] - -2.98518) < 1e-6
    assert abs(hmm.trans(83)["DD"] - -0.94424) < 1e-6
    assert abs(hmm.compo["N"] + 3.21795) < 1e-6

    output = str(hmm)
    assert "SM    hmmsearch -Z 45638612 -E 1000 --cpu 4 HMM pfamseq" in output

    buffer.close()
コード例 #10
0
def test_hmmer_reader_nt():
    buffer = pkg_resources.open_binary(hmmer_reader.data,
                                       "2OG-FeII_Oxy_3-nt.hmm.gz")

    content = gzip.decompress(buffer.read()).decode()
    hmmfile = open_hmmer(StringIO(content))

    hmm = hmmfile.read_model()
    assert hmm.header == "HMMER3/f [3.1b2 | February 2015]"
    assert dict(hmm.metadata)["LENG"] == "315"
    assert hmm.M == 315
    assert hmm.alphabet == "ACGT"
    assert abs(hmm.match(2)["A"] - -2.35771) < 1e-6
    assert abs(hmm.insert(2)["G"] - -1.38629) < 1e-6
    assert abs(hmm.trans(83)["DD"] - -0.40547) < 1e-6
    assert abs(hmm.compo["T"] - -1.50794) < 1e-6

    output = str(hmm)
    assert "DATE  Sun May 24 19:35:19 2015" in output

    buffer.close()
コード例 #11
0
ファイル: test_profile.py プロジェクト: horta/iseq
def test_standard_profile_unihit_homologous_1(PF03373):
    with open_hmmer(PF03373) as reader:
        hmmer = create_profile(reader.read_profile())

    alphabet = hmmer.alphabet
    most_likely_seq = Sequence(b"PGKEDNNK", alphabet)
    r = hmmer.search(most_likely_seq)

    assert_allclose(r.loglikelihood, 11.867796719423442)
    frags = r.fragments
    assert_equal(len(frags), 1)
    frag = frags[0]
    assert_equal(frag.homologous, True)
    assert_equal(frag.sequence.symbols, most_likely_seq.symbols)

    hmmer.multiple_hits = False
    r = hmmer.search(most_likely_seq)
    assert_allclose(r.loglikelihood, 11.94063404337571)
    frags = r.fragments
    assert_equal(len(frags), 1)
    frag = frags[0]
    assert_equal(frag.homologous, True)
    assert_equal(frag.sequence.symbols, most_likely_seq.symbols)
コード例 #12
0
ファイル: test_profile.py プロジェクト: horta/iseq
def test_standard_profile_multihit_homologous1(PF03373):
    with open_hmmer(PF03373) as reader:
        hmmer = create_profile(reader.read_profile())

    alphabet = hmmer.alphabet
    seq = Sequence(b"PPPPGKEDNNKDDDPGKEDNNKEEEE", alphabet)
    r = hmmer.search(seq)
    assert_allclose(r.loglikelihood, 20.329227532144742)
    frags = r.fragments
    assert_equal(len(frags), 5)
    assert_equal(frags[0].homologous, False)
    assert_equal(frags[0].sequence.symbols, b"PPP")
    assert_equal(frags[1].homologous, True)
    assert_equal(frags[1].sequence.symbols, b"PGKEDNNK")
    assert_equal(frags[2].homologous, False)
    assert_equal(frags[2].sequence.symbols, b"DDD")
    assert_equal(frags[3].homologous, True)
    assert_equal(frags[3].sequence.symbols, b"PGKEDNNK")
    assert_equal(frags[4].homologous, False)
    assert_equal(frags[4].sequence.symbols, b"EEEE")

    items = list(frags[0].items())

    assert_equal(items[0][0], b"")
    assert_equal(str(items[0][1]), "<S,0>")
    assert_equal(items[1][0], b"P")
    assert_equal(str(items[1][1]), "<N,1>")
    assert_equal(items[2][0], b"P")
    assert_equal(str(items[2][1]), "<N,1>")
    assert_equal(items[3][0], b"P")
    assert_equal(str(items[3][1]), "<N,1>")
    assert_equal(items[4][0], b"")
    assert_equal(str(items[4][1]), "<B,0>")

    items = list(frags[1].items())

    assert_equal(items[0][0], b"P")
    assert_equal(str(items[0][1]), "<M1,1>")
    assert_equal(items[1][0], b"G")
    assert_equal(str(items[1][1]), "<M2,1>")
    assert_equal(items[2][0], b"K")
    assert_equal(str(items[2][1]), "<M3,1>")
    assert_equal(items[3][0], b"E")
    assert_equal(str(items[3][1]), "<M4,1>")
    assert_equal(items[4][0], b"D")
    assert_equal(str(items[4][1]), "<M5,1>")
    assert_equal(items[5][0], b"N")
    assert_equal(str(items[5][1]), "<M6,1>")
    assert_equal(items[6][0], b"N")
    assert_equal(str(items[6][1]), "<M7,1>")
    assert_equal(items[7][0], b"K")
    assert_equal(str(items[7][1]), "<M8,1>")

    hmmer.multiple_hits = False
    r = hmmer.search(seq)
    assert_allclose(r.loglikelihood, 8.666478660222928)
    frags = r.fragments
    assert_equal(len(frags), 3)
    assert_equal(frags[0].homologous, False)
    assert_equal(frags[1].homologous, True)
    assert_equal(frags[1].sequence.symbols, b"PGKEDNNK")
    assert_equal(frags[2].homologous, False)