def test_30_hmmscan_002(self):
        "Test parsing hmmer3-tab, hmmscan 3.0, single query, no hits (tab_30_hmmscan_002)"

        tab_file = get_file('tab_30_hmmscan_002.out')
        qresults = parse(tab_file, FMT)

        self.assertRaises(StopIteration, next, qresults)
    def test_tab_2226_tblastn_013(self):
        "Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_013)"

        xml_file = get_file('tab_2226_tblastn_013.txt')
        qresults = parse(xml_file, FMT, fields="qseq std sseq")
        counter = 0

        qresult = qresults.next()
        counter += 1

        self.assertEqual('<unknown program>', qresult.program)
        self.assertEqual('<unknown target>', qresult.target)
        self.assertEqual('<unknown version>', qresult.version)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(34.88, hsp.ident_pct)
        self.assertEqual(43, hsp.aln_span)
        self.assertEqual(28, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(30, hsp.query_start)
        self.assertEqual(73, hsp.query_end)
        self.assertEqual(1743, hsp.hit_start)
        self.assertEqual(1872, hsp.hit_end)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)
        self.assertEqual('PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD', str(hsp.query.seq))
        self.assertEqual('PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID', str(hsp.hit.seq))

        hit = qresult[-1]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(33.90, hsp.ident_pct)
        self.assertEqual(59, hsp.aln_span)
        self.assertEqual(31, hsp.mismatch_num)
        self.assertEqual(1, hsp.gapopen_num)
        self.assertEqual(43, hsp.query_start)
        self.assertEqual(94, hsp.query_end)
        self.assertEqual(1056, hsp.hit_start)
        self.assertEqual(1233, hsp.hit_end)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)
        self.assertEqual('GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL', str(hsp.query.seq))
        self.assertEqual('GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL', str(hsp.hit.seq))

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(1, counter)
    def test_30_hmmscan_003(self):
        """Test parsing hmmer3-tab, hmmscan 3.0, single query, single hit, single hsp (tab_30_hmmscan_003)."""
        tab_file = get_file('tab_30_hmmscan_003.out')
        qresults = parse(tab_file, FMT)
        counter = 0

        qresult = next(qresults)
        counter += 1
        self.assertEqual(1, len(qresult))
        self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
        self.assertEqual('-', qresult.accession)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Globin', hit.id)
        self.assertEqual('PF00042.17', hit.accession)
        self.assertEqual(6e-21, hit.evalue)
        self.assertEqual(74.6, hit.bitscore)
        self.assertEqual(0.3, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Globin', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(9.2e-21, hsp.evalue)
        self.assertEqual(74.0, hsp.bitscore)
        self.assertEqual(0.2, hsp.bias)

        # test if we've properly finished iteration
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(1, counter)
    def test_hmmpfam_21(self):
        """Test parsing hmmpfam 2.1 file (text_21_hmmpfam_001.out)"""
        results = parse(path.join("Hmmer", "text_21_hmmpfam_001.out"), self.fmt)
        res = next(results)
        self.assertEqual("roa1_drome", res.id)
        self.assertEqual("<unknown description>", res.description)
        self.assertEqual("hmmpfam", res.program)
        self.assertEqual("2.1.1", res.version)
        self.assertEqual("pfam", res.target)
        self.assertEqual(1, len(res))

        hit = res[0]
        self.assertEqual("SEED", hit.id)
        self.assertEqual("<unknown description>", hit.description)
        self.assertAlmostEqual(146.1, hit.bitscore)
        self.assertAlmostEqual(6.3e-40, hit.evalue)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(77, hsp.hit_end)
        self.assertEqual("[]", hsp.hit_endtype)
        self.assertEqual(32, hsp.query_start)
        self.assertEqual(103, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertAlmostEqual(71.2, hsp.bitscore)
        self.assertAlmostEqual(2.2e-17, hsp.evalue)
        self.assertEqual(
            "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv", str(hsp.hit.seq)
        )
        self.assertEqual(
            "lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf+F+++  ++  + A +    +++++gr+++ ",
            str(hsp.aln_annotation["similarity"]),
        )
        self.assertEqual(
            "LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP", str(hsp.query.seq)
        )

        hsp = hit[1]
        self.assertEqual(2, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(77, hsp.hit_end)
        self.assertEqual("[]", hsp.hit_endtype)
        self.assertEqual(123, hsp.query_start)
        self.assertEqual(194, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertAlmostEqual(75.5, hsp.bitscore)
        self.assertAlmostEqual(1.1e-18, hsp.evalue)
        self.assertEqual(
            "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv", str(hsp.hit.seq)
        )
        self.assertEqual(
            "lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +GfaFVeF++++ ++k +     ++l+g+ + v",
            str(hsp.aln_annotation["similarity"]),
        )
        self.assertEqual(
            "LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV", str(hsp.query.seq)
        )
    def test_tab_2228_tblastx_001(self):
        "Test parsing TBLASTX 2.2.28+ tabular output (tab_2228_tblastx_001)"
        tab_file = get_file('tab_2228_tblastx_001.txt')
        qresults = list(parse(tab_file, FMT,
                              fields=list(all_fields.values()),
                              comments=True))

        # this a single query, with 192 hits and 243 hsps
        self.assertEqual(1, len(qresults))
        self.assertEqual(192, len(qresults[0].hits))
        self.assertEqual(243, sum([len(x) for x in qresults[0]]))

        # only checking the new fields in 2.2.28+
        hit = qresults[0][0]
        self.assertEqual(['NM_001183135', 'EF059095'], hit.accession_all)
        self.assertEqual(['32630', '559292'], hit.tax_ids)
        self.assertEqual(['N/A', 'N/A'], hit.sci_names)
        self.assertEqual(['N/A', 'N/A'], hit.com_names)
        self.assertEqual(['N/A'], hit.blast_names)
        self.assertEqual(['N/A'], hit.super_kingdoms)
        self.assertEqual('Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA', hit.title)
        self.assertEqual(['Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA',
            'Synthetic construct Saccharomyces cerevisiae clone '
            'FLH203015.01X MON2, complete sequence'], hit.title_all)
        self.assertEqual('N/A', hit.strand)
        self.assertEqual(100.0, hit.query_coverage)

        for hsp in hit[:4]:
            # shorthand ~ the values just happen to all be 99
            # in other cases, they may be different
            self.assertEqual(99.0, hsp.query_coverage)
        self.assertEqual(73.0, hit[5].query_coverage)
        self.assertEqual(12.0, hit[6].query_coverage)
    def test_domtab_30_hmmscan_002(self):
        "Test parsing hmmscan-domtab, hmmscan 3.0, single query, no hits (domtab_30_hmmscan_002)"

        tab_file = get_file('domtab_30_hmmscan_002.out')
        qresults = parse(tab_file, self.fmt)

        self.assertRaises(StopIteration, qresults.next, )
    def test_tab_2226_tblastn_002(self):
        "Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_002)"

        xml_file = get_file('tab_2226_tblastn_002.txt')
        qresults = parse(xml_file, FMT)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
    def test_31b1_hmmscan_001(self):
        """Test parsing hmmer3-tab, hmmscan 3.1b1, multiple queries (tab_31b1_hmmscan_001)"""

        tab_file = get_file('tab_31b1_hmmscan_001.out')
        qresults = list(parse(tab_file, FMT))
        self.assertEqual(4, len(qresults))

        # first qresult, first hit, first hsp
        qresult = qresults[0]
        self.assertEqual(1, len(qresult))
        self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
        self.assertEqual('-', qresult.accession)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Globin', hit.id)
        self.assertEqual('PF00042.17', hit.accession)
        self.assertEqual(1e-22, hit.evalue)
        self.assertEqual(80.5, hit.bitscore)
        self.assertEqual(0.3, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Globin', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(1.6e-22, hsp.evalue)
        self.assertEqual(79.8, hsp.bitscore)
        self.assertEqual(0.3, hsp.bias)

        # last qresult, last hit, last hsp
        qresult = qresults[-1]
        self.assertEqual(5, len(qresult))
        self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id)
        self.assertEqual('-', qresult.accession)
        hit = qresult[-1]
        self.assertEqual(1, len(hit))
        self.assertEqual('DUF521', hit.id)
        self.assertEqual('PF04412.8', hit.accession)
        self.assertEqual(0.15, hit.evalue)
        self.assertEqual(10.5, hit.bitscore)
        self.assertEqual(0.1, hit.bias)
        self.assertEqual(1.4, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(0, hit.domain_included_num)
        self.assertEqual('Protein of unknown function (DUF521)', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.28, hsp.evalue)
        self.assertEqual(9.6, hsp.bitscore)
        self.assertEqual(0.1, hsp.bias)
    def test_hmmpfam_23_break_in_end_of_seq(self):
        """Test parsing hmmpfam 2.3 file with a line break in the end of seq marker.

        file (text_23_hmmpfam_004.out)
        """
        results = parse(path.join("Hmmer", "text_23_hmmpfam_004.out"), self.fmt)
        res = next(results)
        self.assertEqual('PKSI-KS', res[0].id)
        self.assertEqual('PKSI-FK', res[1].id)
    def test_tab_2226_tblastn_007(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_007)"

        xml_file = get_file('tab_2226_tblastn_007.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(34.88, hsp.ident_pct)
        self.assertEqual(43, hsp.aln_span)
        self.assertEqual(28, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(30, hsp.query_start)
        self.assertEqual(73, hsp.query_end)
        self.assertEqual(1743, hsp.hit_start)
        self.assertEqual(1872, hsp.hit_end)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)

        hit = qresult[-1]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(33.90, hsp.ident_pct)
        self.assertEqual(59, hsp.aln_span)
        self.assertEqual(31, hsp.mismatch_num)
        self.assertEqual(1, hsp.gapopen_num)
        self.assertEqual(43, hsp.query_start)
        self.assertEqual(94, hsp.query_end)
        self.assertEqual(1056, hsp.hit_start)
        self.assertEqual(1233, hsp.hit_end)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(1, counter)
    def test_30_hmmscan_004(self):
        "Test parsing hmmer3-tab, hmmscan 3.0, single query, multiple hits (tab_30_hmmscan_004)"

        tab_file = get_file('tab_30_hmmscan_004.out')
        qresults = parse(tab_file, FMT)
        counter = 0

        qresult = next(qresults)
        counter += 1
        self.assertEqual(2, len(qresult))
        self.assertEqual('gi|126362951:116-221', qresult.id)
        self.assertEqual('-', qresult.acc)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Ig_3', hit.id)
        self.assertEqual('PF13927.1', hit.acc)
        self.assertEqual(1.4e-09, hit.evalue)
        self.assertEqual(38.2, hit.bitscore)
        self.assertEqual(0.4, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Immunoglobulin domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(2.1e-09, hsp.evalue)
        self.assertEqual(37.6, hsp.bitscore)
        self.assertEqual(0.3, hsp.bias)
        hit = qresult[1]
        self.assertEqual(1, len(hit))
        self.assertEqual('Ig_2', hit.id)
        self.assertEqual('PF13895.1', hit.acc)
        self.assertEqual(3.5e-05, hit.evalue)
        self.assertEqual(23.7, hit.bitscore)
        self.assertEqual(0.1, hit.bias)
        self.assertEqual(1.1, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Immunoglobulin domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(4.3e-05, hsp.evalue)
        self.assertEqual(23.4, hsp.bitscore)
        self.assertEqual(0.1, hsp.bias)

        # test if we've properly finished iteration
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(1, counter)
    def test_31b1_hmmsearch_001(self):
        """Test parsing hmmer3-tab, hmmsearch 3.1b1, multiple queries (tab_31b1_hmmscan_001)"""

        tab_file = get_file('tab_31b1_hmmsearch_001.out')
        qresults = list(parse(tab_file, FMT))
        self.assertEqual(1, len(qresults))

        # first qresult
        qresult = qresults[0]
        self.assertEqual(4, len(qresult))
        self.assertEqual('Pkinase', qresult.id)
        self.assertEqual('PF00069.17', qresult.accession)

        # first hit, first hsp
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id)
        self.assertEqual('-', hit.accession)
        self.assertEqual(8.5e-147, hit.evalue)
        self.assertEqual(492.3, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(2.1, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(2, hit.domain_included_num)
        self.assertEqual('Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=1 SV=1', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(1.2e-72, hsp.evalue)
        self.assertEqual(249.3, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)

        # last hit, last hsp
        hit = qresult[-1]
        self.assertEqual(1, len(hit))
        self.assertEqual('sp|P18652|KS6AA_CHICK', hit.id)
        self.assertEqual('-', hit.accession)
        self.assertEqual(2.6e-145, hit.evalue)
        self.assertEqual(487.5, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(2.1, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(2, hit.domain_included_num)
        self.assertEqual('Ribosomal protein S6 kinase 2 alpha OS=Gallus gallus GN=RPS6KA PE=2 SV=1', hit.description)
        hsp = hit.hsps[-1]
        self.assertEqual(7.6e-72, hsp.evalue)
        self.assertEqual(246.7, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
    def test_tab_2226_tblastn_005_comments_false(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_005)"

        tab_file = get_file('tab_2226_tblastn_005.txt')
        exc_msg = ("Encountered unexpected character '#' at the beginning of"
                   " a line. Set comments=True if the file is a commented"
                   " file.")
        qresults = parse(tab_file, FMT)
        with self.assertRaises(ValueError, msg=exc_msg):
            next(qresults)
    def test_hmmpfam_23_no_match(self):
        """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_002.out)"""
        results = parse(path.join("Hmmer", "text_23_hmmpfam_002.out"), self.fmt)
        res = next(results)

        self.assertEqual('SEQ0001', res.id)
        self.assertEqual(0, len(res.hits))

        res = next(results)

        self.assertEqual('SEQ0002', res.id)
        self.assertEqual(0, len(res.hits))
    def test_tab_2226_tblastn_008(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_008)"

        xml_file = get_file('tab_2226_tblastn_008.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|11464971:4-101', qresult.id)
        self.assertEqual(5, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id)
        self.assertEqual('gi|11464971:4-101', hit.query_id)
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)
        self.assertEqual(95.92, hsp.ident_pct)
        self.assertEqual(98, hsp.aln_span)
        self.assertEqual(4, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(98, hsp.query_end)
        self.assertEqual(94, hsp.hit_start)
        self.assertEqual(388, hsp.hit_end)
        self.assertEqual(2e-67, hsp.evalue)
        self.assertEqual(199, hsp.bitscore)

        hsp = hit.hsps[-1]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)
        self.assertEqual(29.58, hsp.ident_pct)
        self.assertEqual(71, hsp.aln_span)
        self.assertEqual(46, hsp.mismatch_num)
        self.assertEqual(2, hsp.gapopen_num)
        self.assertEqual(29, hsp.query_start)
        self.assertEqual(96, hsp.query_end)
        self.assertEqual(541, hsp.hit_start)
        self.assertEqual(754, hsp.hit_end)
        self.assertEqual(4e-05, hsp.evalue)
        self.assertEqual(32.7, hsp.bitscore)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(1, counter)
    def test_hmmpfam_23(self):
        """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_001.out)."""
        results = parse(path.join("Hmmer", "text_23_hmmpfam_001.out"),
                        self.fmt)
        res = next(results)
        self.assertEqual('gi|90819130|dbj|BAE92499.1|', res.id)
        self.assertEqual('glutamate synthase [Porphyra yezoensis]',
                         res.description)
        self.assertEqual('[none]', res.accession)
        self.assertEqual('hmmpfam', res.program)
        self.assertEqual('2.3.2', res.version)
        self.assertEqual('../Shared/Pfam_fs', res.target)
        self.assertEqual(54, len(res))

        hit = res[0]
        self.assertEqual('Glu_synthase', hit.id)
        self.assertEqual('Conserved region in glutamate synthas',
                         hit.description)
        self.assertAlmostEqual(858.6, hit.bitscore)
        self.assertAlmostEqual(3.6e-255, hit.evalue)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(296, hsp.hit_start)
        self.assertEqual(323, hsp.hit_end)
        self.assertEqual('..', hsp.hit_endtype)
        self.assertEqual(649, hsp.query_start)
        self.assertEqual(676, hsp.query_end)
        self.assertEqual('..', hsp.query_endtype)
        self.assertAlmostEqual(1.3, hsp.bitscore)
        self.assertAlmostEqual(3, hsp.evalue)
        self.assertEqual('lPwelgLaevhqtLvengLRdrVsLia', str(hsp.hit.seq))
        self.assertEqual('+P  l++ +vh  L++ gLR + s+ +',
                         str(hsp.aln_annotation['similarity']))
        self.assertEqual('IPPLLAVGAVHHHLINKGLRQEASILV', str(hsp.query.seq))

        hsp = hit[1]
        self.assertEqual(2, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(412, hsp.hit_end)
        self.assertEqual('[]', hsp.hit_endtype)
        self.assertEqual(829, hsp.query_start)
        self.assertEqual(1216, hsp.query_end)
        self.assertEqual('..', hsp.query_endtype)
        self.assertAlmostEqual(857.3, hsp.bitscore)
        self.assertAlmostEqual(9e-255, hsp.evalue)
    def test_tab_2228_tblastn_001(self):
        "Test parsing TBLASTN 2.2.28+ tabular output (tab_2228_tblastn_001)"
        tab_file = get_file('tab_2228_tblastn_001.txt')
        qresults = list(parse(tab_file, FMT,
                              fields=['evalue', 'sallseqid', 'qseqid'],
                              comments=True))

        self.assertEqual(1, len(qresults))
        self.assertEqual(10, len(qresults[0].hits))
        # there is one hit with an alternative ID
        self.assertEqual(qresults[0]['gi|148227873|ref|NM_001095167.1|'],
                qresults[0]['gi|55250552|gb|BC086280.1|'])

        # check some of the HSPs
        self.assertEqual(0.0, qresults[0][0][0].evalue)
        self.assertEqual(8e-173, qresults[0][-1][0].evalue)
Beispiel #18
0
def parse_hmmout(file_name):
    '''
    parse an hmmsearch output and return an array with group, family and score\n
    INPUT: filename of the hmmsearch output\n
    OUTPUT: np.array [id]
    '''
    output = np.array([])
    for qresult in parse(file_name, 'hmmer3-tab'):
        for item in qresult.hits:
            if output.size == 0:
                output = np.array([int(item.bitscore), item.id.split('|')[1]])
            else:
                output = np.vstack(
                    [output, [int(item.bitscore),
                              item.id.split('|')[1]]])
    return output
    def test_tab_2228_tblastn_001(self):
        """Test parsing TBLASTN 2.2.28+ tabular output (tab_2228_tblastn_001)."""
        tab_file = get_file('tab_2228_tblastn_001.txt')
        qresults = list(parse(tab_file, FMT,
                              fields=['evalue', 'sallseqid', 'qseqid'],
                              comments=True))

        self.assertEqual(1, len(qresults))
        self.assertEqual(10, len(qresults[0].hits))
        # there is one hit with an alternative ID
        self.assertEqual(qresults[0]['gi|148227873|ref|NM_001095167.1|'],
                         qresults[0]['gi|55250552|gb|BC086280.1|'])

        # check some of the HSPs
        self.assertEqual(0.0, qresults[0][0][0].evalue)
        self.assertEqual(8e-173, qresults[0][-1][0].evalue)
    def test_hmmpfam_23(self):
        """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_001.out)"""
        results = parse(path.join("Hmmer", "text_23_hmmpfam_001.out"), self.fmt)
        res = next(results)
        self.assertEqual('gi|90819130|dbj|BAE92499.1|', res.id)
        self.assertEqual('glutamate synthase [Porphyra yezoensis]', res.description)
        self.assertEqual('[none]', res.accession)
        self.assertEqual('hmmpfam', res.program)
        self.assertEqual('2.3.2', res.version)
        self.assertEqual('../Shared/Pfam_fs', res.target)
        self.assertEqual(54, len(res))

        hit = res[0]
        self.assertEqual('Glu_synthase', hit.id)
        self.assertEqual('Conserved region in glutamate synthas', hit.description)
        self.assertAlmostEqual(858.6, hit.bitscore)
        self.assertAlmostEqual(3.6e-255, hit.evalue)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(296, hsp.hit_start)
        self.assertEqual(323, hsp.hit_end)
        self.assertEqual('..', hsp.hit_endtype)
        self.assertEqual(649, hsp.query_start)
        self.assertEqual(676, hsp.query_end)
        self.assertEqual('..', hsp.query_endtype)
        self.assertAlmostEqual(1.3, hsp.bitscore)
        self.assertAlmostEqual(3, hsp.evalue)
        self.assertEqual('lPwelgLaevhqtLvengLRdrVsLia',
                         str(hsp.hit.seq))
        self.assertEqual('+P  l++ +vh  L++ gLR + s+ +',
                         str(hsp.aln_annotation['similarity']))
        self.assertEqual('IPPLLAVGAVHHHLINKGLRQEASILV',
                         str(hsp.query.seq))

        hsp = hit[1]
        self.assertEqual(2, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(412, hsp.hit_end)
        self.assertEqual('[]', hsp.hit_endtype)
        self.assertEqual(829, hsp.query_start)
        self.assertEqual(1216, hsp.query_end)
        self.assertEqual('..', hsp.query_endtype)
        self.assertAlmostEqual(857.3, hsp.bitscore)
        self.assertAlmostEqual(9e-255, hsp.evalue)
Beispiel #21
0
def parse_labelled_hmmout(file_name):
    '''
    parse an hmmsearch output and return an array with group, family and score\n
    INPUT: filename of the hmmsearch output\n
    OUTPUT: np.array [group, family, score]
    '''
    output = np.array([])
    for qresult in parse(file_name, 'hmmer3-tab'):
        for item in qresult.hits:
            group = item.description.split()[1]
            family = item.description.split()[2]
            score = item.bitscore
            if output.size == 0:
                output = np.array([int(score), group, family])
            else:
                output = np.vstack([output, [int(score), group, family]])
    return output
Beispiel #22
0
    def test_tab_2226_tblastn_012(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_012)"

        xml_file = get_file('tab_2226_tblastn_012.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        # test first qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('refseq_rna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('random_s00', qresult.id)
        self.assertEqual('X76FDCG9016', qresult.rid)
        self.assertEqual(0, len(qresult))

        # test second qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('refseq_rna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id)
        self.assertEqual('X76FDCG9016', qresult.rid)
        self.assertEqual(3, len(qresult))

        # test last qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('refseq_rna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|11464971:4-101', qresult.id)
        self.assertEqual('X76FDCG9016', qresult.rid)
        self.assertEqual(5, len(qresult))

        # check if we've finished iteration over qresults
        self.assertRaises(
            StopIteration,
            qresults.next,
        )
        self.assertEqual(3, counter)
Beispiel #23
0
    def test_hmmpfam_23_missing_consensus(self):
        """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_003.out)"""
        results = parse(path.join("Hmmer", "text_23_hmmpfam_003.out"),
                        self.fmt)
        res = next(results)

        self.assertEqual('small_input', res.id)
        self.assertEqual('[none]', res.description)
        self.assertEqual('[none]', res.accession)
        self.assertEqual('hmmpfam', res.program)
        self.assertEqual('2.3.2', res.version)
        self.assertEqual(
            'antismash/specific_modules/lantipeptides/ClassIVLanti.hmm',
            res.target)
        self.assertEqual(1, len(res))

        hit = res[0]
        self.assertEqual('ClassIVLanti', hit.id)
        self.assertEqual('Class-IV', hit.description)
        self.assertAlmostEqual(-79.3, hit.bitscore)
        self.assertAlmostEqual(1, hit.evalue)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(66, hsp.hit_end)
        self.assertEqual('[]', hsp.hit_endtype)
        self.assertEqual(5, hsp.query_start)
        self.assertEqual(20, hsp.query_end)
        self.assertEqual('..', hsp.query_endtype)
        self.assertAlmostEqual(-79.3, hsp.bitscore)
        self.assertAlmostEqual(1, hsp.evalue)
        self.assertEqual(len(hsp.query.seq), len(hsp.hit.seq))
        self.assertEqual(len(hsp.query.seq),
                         len(hsp.aln_annotation['similarity']))
        self.assertEqual(
            'msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLnahiqakeLsdeeLEgvaGg',
            str(hsp.hit.seq))
        self.assertEqual(
            '        F+                           G  +t   Ln                   ',
            str(hsp.aln_annotation['similarity']))
        self.assertEqual(
            '-------CFL---------------------------GCLVTNWVLNRS-----------------',
            str(hsp.query.seq))
    def test_tab_2228_tblastx_001(self):
        """Test parsing TBLASTX 2.2.28+ tabular output (tab_2228_tblastx_001)."""
        tab_file = get_file("tab_2228_tblastx_001.txt")
        qresults = list(
            parse(tab_file,
                  FMT,
                  fields=list(all_fields.values()),
                  comments=True))

        # this a single query, with 192 hits and 243 hsps
        self.assertEqual(1, len(qresults))
        self.assertEqual(192, len(qresults[0].hits))
        self.assertEqual(243, sum(len(x) for x in qresults[0]))
        # there is one hit with an alternative ID
        self.assertEqual(
            qresults[0]["gi|31126987|gb|AY255526.2|"],
            qresults[0]["gi|31342050|ref|NM_181083.2|"],
        )

        # only checking the new fields in 2.2.28+
        hit = qresults[0][0]
        self.assertEqual(["NM_001183135", "EF059095"], hit.accession_all)
        self.assertEqual(["32630", "559292"], hit.tax_ids)
        self.assertEqual(["N/A", "N/A"], hit.sci_names)
        self.assertEqual(["N/A", "N/A"], hit.com_names)
        self.assertEqual(["N/A"], hit.blast_names)
        self.assertEqual(["N/A"], hit.super_kingdoms)
        self.assertEqual("Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA",
                         hit.title)
        self.assertEqual(
            [
                "Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA",
                "Synthetic construct Saccharomyces cerevisiae clone "
                "FLH203015.01X MON2, complete sequence",
            ],
            hit.title_all,
        )
        self.assertEqual("N/A", hit.strand)
        self.assertEqual(100.0, hit.query_coverage)

        for hsp in hit[:4]:
            # shorthand ~ the values just happen to all be 99
            # in other cases, they may be different
            self.assertEqual(99.0, hsp.query_coverage)
        self.assertEqual(73.0, hit[5].query_coverage)
        self.assertEqual(12.0, hit[6].query_coverage)
    def test_xml_001(self):
        xml_file = get_file('test_001.xml')
        qresults = parse(xml_file, FMT)
        counter = 0

        # test each qresult's attributes
        qresult = next(qresults)
        counter += 1

        self.assertEqual('5.26-65.0', qresult.version)

        # test parsed values of qresult
        self.assertEqual('AT5G23090.4', qresult.id)
        self.assertEqual('pacid=19665592 transcript=AT5G23090.4 locus=AT5G23090 ID=AT5G23090.4.TAIR10 annot-version=TAIR10',
                qresult.description)
        self.assertEqual(4, len(qresult))

        hit = qresult[0]
        self.assertEqual('PF00808', hit.id)
        self.assertEqual('Histone-like transcription factor (CBF/NF-Y) and archaeal histone', hit.description)
        self.assertEqual('PFAM', hit.attributes['Target'])
        self.assertEqual('31.0', hit.attributes['Target version'])
        self.assertEqual('hmmer3', hit.attributes['Hit type'])
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual(76.7, hsp.bitscore)
        self.assertEqual(1.1e-21, hsp.evalue)
        self.assertEqual(13, hsp.query_start)
        self.assertEqual(79, hsp.query_end)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(65, hsp.hit_end)
        self.assertEqual(66, hsp.aln_span)
        self.assertEqual('MDPMDIVGKSKEDASLPKATMTKIIKEMLPPDVRVARDAQDLLIECCVEFINLVSSESNDVCNKEDKRTIAPEHVLKALQVLGFGEYIEEVYAAYEQHKYETMDTQRSVKWNPGAQMTEEEAAAEQQRMFAEARARMNGGVSVPQPEHPETDQRSPQS', str(hsp.query.seq))

        # parse last hit
        hit = qresult[-1]
        self.assertEqual('SSF47113', hit.id)
        self.assertEqual(1, len(hit))
        self.assertEqual('IPR:IPR009072', hit.dbxrefs[0])
        self.assertEqual('GO:0046982', hit.dbxrefs[1])

        hsp = hit.hsps[0]
        self.assertEqual(11, hsp.query_start)
        self.assertEqual(141, hsp.query_end)
    def test_domtab_30_hmmscan_003(self):
        "Test parsing hmmscan-domtab, hmmscan 3.0, multiple queries (domtab_30_hmmscan_003)"

        tab_file = get_file('domtab_30_hmmscan_003.out')
        qresults = parse(tab_file, self.fmt)
        counter = 0

        qresult = qresults.next()
        counter += 1
        self.assertEqual(1, len(qresult))
        self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
        self.assertEqual('-', qresult.accession)
        self.assertEqual(154, qresult.seq_len)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Globin', hit.id)
        self.assertEqual('gi|4885477|ref|NP_005359.1|', hit.query_id)
        self.assertEqual('PF00042.17', hit.accession)
        self.assertEqual(108, hit.seq_len)
        self.assertEqual(6e-21, hit.evalue)
        self.assertEqual(74.6, hit.bitscore)
        self.assertEqual(0.3, hit.bias)
        self.assertEqual('Globin', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual('Globin', hsp.hit_id)
        self.assertEqual('gi|4885477|ref|NP_005359.1|', hsp.query_id)
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(6.7e-25, hsp.evalue_cond)
        self.assertEqual(9.2e-21, hsp.evalue)
        self.assertEqual(74.0, hsp.bitscore)
        self.assertEqual(0.2, hsp.bias)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(107, hsp.hit_end)
        self.assertEqual(6, hsp.query_start)
        self.assertEqual(112, hsp.query_end)
        self.assertEqual(6, hsp.env_start)
        self.assertEqual(113, hsp.env_end)
        self.assertEqual(0.97, hsp.acc_avg)

        # test if we've properly finished iteration
        self.assertRaises(
            StopIteration,
            qresults.next,
        )
        self.assertEqual(1, counter)
    def test_tab_2226_tblastn_006(self):
        """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_006)."""
        xml_file = get_file("tab_2226_tblastn_006.txt")
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("db/minirefseq_mrna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("random_s00", qresult.id)
        self.assertEqual(0, len(qresult))

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(1, counter)
    def test_tab_2226_tblastn_006(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_006)"

        xml_file = get_file('tab_2226_tblastn_006.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('random_s00', qresult.id)
        self.assertEqual(0, len(qresult))

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(1, counter)
    def test_tab_2226_tblastn_012(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_012)"

        xml_file = get_file('tab_2226_tblastn_012.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        # test first qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('refseq_rna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('random_s00', qresult.id)
        self.assertEqual('X76FDCG9016', qresult.rid)
        self.assertEqual(0, len(qresult))

        # test second qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('refseq_rna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id)
        self.assertEqual('X76FDCG9016', qresult.rid)
        self.assertEqual(3, len(qresult))

        # test last qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('refseq_rna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|11464971:4-101', qresult.id)
        self.assertEqual('X76FDCG9016', qresult.rid)
        self.assertEqual(5, len(qresult))

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(3, counter)
Beispiel #30
0
    def test_tab_2226_tblastn_006(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_006)"

        xml_file = get_file('tab_2226_tblastn_006.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('random_s00', qresult.id)
        self.assertEqual(0, len(qresult))

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(1, counter)
Beispiel #31
0
    def test_hmmpfam_22(self):
        """Test parsing hmmpfam 2.2 file (text_22_hmmpfam_001.out)"""
        results = parse(path.join("Hmmer", "text_22_hmmpfam_001.out"),
                        self.fmt)
        res = next(results)
        self.assertEqual('gi|1522636|gb|AAC37060.1|', res.id)
        self.assertEqual(
            'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]',
            res.description)
        self.assertEqual('[none]', res.accession)
        self.assertEqual('hmmpfam', res.program)
        self.assertEqual('2.2g', res.version)
        self.assertEqual('Pfam', res.target)
        self.assertEqual(1, len(res))

        hit = res[0]
        self.assertEqual('Methylase_M', hit.id)
        self.assertEqual('Type I restriction modification system, M',
                         hit.description)
        self.assertAlmostEqual(-105.2, hit.bitscore)
        self.assertAlmostEqual(0.0022, hit.evalue)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(279, hsp.hit_end)
        self.assertEqual('[]', hsp.hit_endtype)
        self.assertEqual(279, hsp.query_start)
        self.assertEqual(481, hsp.query_end)
        self.assertEqual('..', hsp.query_endtype)
        self.assertAlmostEqual(-105.2, hsp.bitscore)
        self.assertAlmostEqual(0.0022, hsp.evalue)
        self.assertEqual(
            'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn',
            str(hsp.hit.seq))
        self.assertEqual(
            ' ++EL+++  av+   R              L+F K++ dk      +i+         p +   + +++y   ++   ++ ++y ++      + lF++++   e ++  ++++ + +    ++      + +       Glf ++++  ++ +s+   +ne ++e+i+ +++ +++     G++ +el   D++G +YE L+   Ae   K+ G +YTP e++  ia+ + i+  ++                  +++ ++    k+n+i +    s+',
            str(hsp.aln_annotation['similarity']))
        self.assertEqual(
            'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST',
            str(hsp.query.seq))
    def test_domtab_30_hmmscan_003(self):
        "Test parsing hmmscan-domtab, hmmscan 3.0, multiple queries (domtab_30_hmmscan_003)"

        tab_file = get_file('domtab_30_hmmscan_003.out')
        qresults = parse(tab_file, self.fmt)
        counter = 0

        qresult = next(qresults)
        counter += 1
        self.assertEqual(1, len(qresult))
        self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
        self.assertEqual('-', qresult.accession)
        self.assertEqual(154, qresult.seq_len)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Globin', hit.id)
        self.assertEqual('gi|4885477|ref|NP_005359.1|', hit.query_id)
        self.assertEqual('PF00042.17', hit.accession)
        self.assertEqual(108, hit.seq_len)
        self.assertEqual(6e-21, hit.evalue)
        self.assertEqual(74.6, hit.bitscore)
        self.assertEqual(0.3, hit.bias)
        self.assertEqual('Globin', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual('Globin', hsp.hit_id)
        self.assertEqual('gi|4885477|ref|NP_005359.1|', hsp.query_id)
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(6.7e-25, hsp.evalue_cond)
        self.assertEqual(9.2e-21, hsp.evalue)
        self.assertEqual(74.0, hsp.bitscore)
        self.assertEqual(0.2, hsp.bias)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(107, hsp.hit_end)
        self.assertEqual(6, hsp.query_start)
        self.assertEqual(112, hsp.query_end)
        self.assertEqual(6, hsp.env_start)
        self.assertEqual(113, hsp.env_end)
        self.assertEqual(0.97, hsp.acc_avg)

        # test if we've properly finished iteration
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(1, counter)
    def test_tab_2226_tblastn_012(self):
        """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_012)."""
        xml_file = get_file("tab_2226_tblastn_012.txt")
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        # test first qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("refseq_rna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("random_s00", qresult.id)
        self.assertEqual("X76FDCG9016", qresult.rid)
        self.assertEqual(0, len(qresult))

        # test second qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("refseq_rna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
        self.assertEqual("X76FDCG9016", qresult.rid)
        self.assertEqual(3, len(qresult))

        # test last qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("refseq_rna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("gi|11464971:4-101", qresult.id)
        self.assertEqual("X76FDCG9016", qresult.rid)
        self.assertEqual(5, len(qresult))

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(3, counter)
    def test_hmmpfam_22(self):
        """Test parsing hmmpfam 2.2 file (text_22_hmmpfam_001.out)"""
        results = parse(path.join("Hmmer", "text_22_hmmpfam_001.out"), self.fmt)
        res = next(results)
        self.assertEqual("gi|1522636|gb|AAC37060.1|", res.id)
        self.assertEqual("M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]", res.description)
        self.assertEqual("[none]", res.accession)
        self.assertEqual("hmmpfam", res.program)
        self.assertEqual("2.2g", res.version)
        self.assertEqual("Pfam", res.target)
        self.assertEqual(1, len(res))

        hit = res[0]
        self.assertEqual("Methylase_M", hit.id)
        self.assertEqual("Type I restriction modification system, M", hit.description)
        self.assertAlmostEqual(-105.2, hit.bitscore)
        self.assertAlmostEqual(0.0022, hit.evalue)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(279, hsp.hit_end)
        self.assertEqual("[]", hsp.hit_endtype)
        self.assertEqual(279, hsp.query_start)
        self.assertEqual(481, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertAlmostEqual(-105.2, hsp.bitscore)
        self.assertAlmostEqual(0.0022, hsp.evalue)
        self.assertEqual(
            "lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn",
            str(hsp.hit.seq),
        )
        self.assertEqual(
            " ++EL+++  av+   R              L+F K++ dk      +i+         p +   + +++y   ++   ++ ++y ++      + lF++++   e ++  ++++ + +    ++      + +       Glf ++++  ++ +s+   +ne ++e+i+ +++ +++     G++ +el   D++G +YE L+   Ae   K+ G +YTP e++  ia+ + i+  ++                  +++ ++    k+n+i +    s+",
            str(hsp.aln_annotation["similarity"]),
        )
        self.assertEqual(
            "NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST",
            str(hsp.query.seq),
        )
Beispiel #35
0
    def test_domtab_30_hmmsearch_001(self):
        "Test parsing hmmsearch-domtab, hmmsearch 3.0, multiple queries (domtab_30_hmmsearch_001)"

        tab_file = get_file('domtab_30_hmmsearch_001.out')
        qresults = parse(tab_file, self.fmt)

        # first qresult
        # we only want to check the coordinate switch actually
        # so checking the first hsp of the first hit of the qresult is enough
        qresult = next(qresults)
        self.assertEqual(7, len(qresult))
        self.assertEqual('Pkinase', qresult.id)
        self.assertEqual('PF00069.17', qresult.accession)
        self.assertEqual(260, qresult.seq_len)
        hit = qresult[0]
        self.assertEqual(2, len(hit))
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id)
        self.assertEqual('Pkinase', hit.query_id)
        self.assertEqual('-', hit.accession)
        self.assertEqual(733, hit.seq_len)
        self.assertEqual(8.4e-147, hit.evalue)
        self.assertEqual(492.3, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(
            'Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=2 SV=1',
            hit.description)
        hsp = hit.hsps[0]
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id)
        self.assertEqual('Pkinase', hsp.query_id)
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(4.6e-75, hsp.evalue_cond)
        self.assertEqual(3.5e-70, hsp.evalue)
        self.assertEqual(241.2, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        self.assertEqual(58, hsp.hit_start)
        self.assertEqual(318, hsp.hit_end)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(260, hsp.query_end)
        self.assertEqual(58, hsp.env_start)
        self.assertEqual(318, hsp.env_end)
        self.assertEqual(0.95, hsp.acc_avg)
Beispiel #36
0
    def test_30_hmmscan_003(self):
        "Test parsing hmmer3-tab, hmmscan 3.0, single query, single hit, single hsp (tab_30_hmmscan_003)"

        tab_file = get_file('tab_30_hmmscan_003.out')
        qresults = parse(tab_file, FMT)
        counter = 0

        qresult = qresults.next()
        counter += 1
        self.assertEqual(1, len(qresult))
        self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
        self.assertEqual('-', qresult.acc)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Globin', hit.id)
        self.assertEqual('PF00042.17', hit.acc)
        self.assertEqual(6e-21, hit.evalue)
        self.assertEqual(74.6, hit.bitscore)
        self.assertEqual(0.3, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Globin', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(9.2e-21, hsp.evalue)
        self.assertEqual(74.0, hsp.bitscore)
        self.assertEqual(0.2, hsp.bias)

        # test if we've properly finished iteration
        self.assertRaises(
            StopIteration,
            qresults.next,
        )
        self.assertEqual(1, counter)
Beispiel #37
0
    def test_domtab_31b1_hmmsearch_001(self):
        "Test parsing hmmsearch-domtab, hmmsearch 3.1b1, single query (domtab_31b1_hmmsearch_001)"

        tab_file = get_file('domtab_31b1_hmmsearch_001.out')
        qresults = list(parse(tab_file, self.fmt))

        self.assertEqual(1, len(qresults))

        qresult = qresults[0]
        self.assertEqual('Pkinase', qresult.id)
        self.assertEqual('PF00069.17', qresult.accession)
        self.assertEqual(260, qresult.seq_len)
        hit = qresult[0]
        self.assertEqual(2, len(hit))
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id)
        self.assertEqual('Pkinase', hit.query_id)
        self.assertEqual('-', hit.accession)
        self.assertEqual(733, hit.seq_len)
        self.assertEqual(8.5e-147, hit.evalue)
        self.assertEqual(492.3, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(
            'Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=1 SV=1',
            hit.description)
        hsp = hit.hsps[0]
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id)
        self.assertEqual('Pkinase', hsp.query_id)
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(2.6e-75, hsp.evalue_cond)
        self.assertEqual(3.6e-70, hsp.evalue)
        self.assertEqual(241.2, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        self.assertEqual(58, hsp.hit_start)
        self.assertEqual(318, hsp.hit_end)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(260, hsp.query_end)
        self.assertEqual(58, hsp.env_start)
        self.assertEqual(318, hsp.env_end)
        self.assertEqual(0.95, hsp.acc_avg)
    def test_hmmpfam_23_missing_consensus(self):
        """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_003.out)"""
        results = parse(path.join("Hmmer", "text_23_hmmpfam_003.out"), self.fmt)
        res = next(results)

        self.assertEqual('small_input', res.id)
        self.assertEqual('[none]', res.description)
        self.assertEqual('[none]', res.accession)
        self.assertEqual('hmmpfam', res.program)
        self.assertEqual('2.3.2', res.version)
        self.assertEqual('antismash/specific_modules/lantipeptides/ClassIVLanti.hmm', res.target)
        self.assertEqual(1, len(res))

        hit = res[0]
        self.assertEqual('ClassIVLanti', hit.id)
        self.assertEqual('Class-IV', hit.description)
        self.assertAlmostEqual(-79.3, hit.bitscore)
        self.assertAlmostEqual(1, hit.evalue)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(66, hsp.hit_end)
        self.assertEqual('[]', hsp.hit_endtype)
        self.assertEqual(5, hsp.query_start)
        self.assertEqual(20, hsp.query_end)
        self.assertEqual('..', hsp.query_endtype)
        self.assertAlmostEqual(-79.3, hsp.bitscore)
        self.assertAlmostEqual(1, hsp.evalue)
        self.assertEqual(len(hsp.query.seq), len(hsp.hit.seq))
        self.assertEqual(len(hsp.query.seq), len(hsp.aln_annotation['similarity']))
        self.assertEqual('msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLnahiqakeLsdeeLEgvaGg',
                         str(hsp.hit.seq))
        self.assertEqual('        F+                           G  +t   Ln                   ',
                         str(hsp.aln_annotation['similarity']))
        self.assertEqual('-------CFL---------------------------GCLVTNWVLNRS-----------------',
                         str(hsp.query.seq))
    def test_domtab_30_hmmsearch_001(self):
        "Test parsing hmmsearch-domtab, hmmsearch 3.0, multiple queries (domtab_30_hmmsearch_001)"

        tab_file = get_file('domtab_30_hmmsearch_001.out')
        qresults = parse(tab_file, self.fmt)

        # first qresult
        # we only want to check the coordinate switch actually
        # so checking the first hsp of the first hit of the qresult is enough
        qresult = next(qresults)
        self.assertEqual(7, len(qresult))
        self.assertEqual('Pkinase', qresult.id)
        self.assertEqual('PF00069.17', qresult.accession)
        self.assertEqual(260, qresult.seq_len)
        hit = qresult[0]
        self.assertEqual(2, len(hit))
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id)
        self.assertEqual('Pkinase', hit.query_id)
        self.assertEqual('-', hit.accession)
        self.assertEqual(733, hit.seq_len)
        self.assertEqual(8.4e-147, hit.evalue)
        self.assertEqual(492.3, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual('Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=2 SV=1', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id)
        self.assertEqual('Pkinase', hsp.query_id)
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(4.6e-75, hsp.evalue_cond)
        self.assertEqual(3.5e-70, hsp.evalue)
        self.assertEqual(241.2, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        self.assertEqual(58, hsp.hit_start)
        self.assertEqual(318, hsp.hit_end)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(260, hsp.query_end)
        self.assertEqual(58, hsp.env_start)
        self.assertEqual(318, hsp.env_end)
        self.assertEqual(0.95, hsp.acc_avg)
    def test_tab_2228_tblastx_001(self):
        "Test parsing TBLASTX 2.2.28+ tabular output (tab_2228_tblastx_001)"
        tab_file = get_file('tab_2228_tblastx_001.txt')
        qresults = list(
            parse(tab_file,
                  FMT,
                  fields=list(all_fields.values()),
                  comments=True))

        # this a single query, with 192 hits and 243 hsps
        self.assertEqual(1, len(qresults))
        self.assertEqual(192, len(qresults[0].hits))
        self.assertEqual(243, sum([len(x) for x in qresults[0]]))

        # only checking the new fields in 2.2.28+
        hit = qresults[0][0]
        self.assertEqual(['NM_001183135', 'EF059095'], hit.accession_all)
        self.assertEqual(['32630', '559292'], hit.tax_ids)
        self.assertEqual(['N/A', 'N/A'], hit.sci_names)
        self.assertEqual(['N/A', 'N/A'], hit.com_names)
        self.assertEqual(['N/A'], hit.blast_names)
        self.assertEqual(['N/A'], hit.super_kingdoms)
        self.assertEqual('Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA',
                         hit.title)
        self.assertEqual([
            'Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA',
            'Synthetic construct Saccharomyces cerevisiae clone '
            'FLH203015.01X MON2, complete sequence'
        ], hit.title_all)
        self.assertEqual('N/A', hit.strand)
        self.assertEqual(100.0, hit.query_coverage)

        for hsp in hit[:4]:
            # shorthand ~ the values just happen to all be 99
            # in other cases, they may be different
            self.assertEqual(99.0, hsp.query_coverage)
        self.assertEqual(73.0, hit[5].query_coverage)
        self.assertEqual(12.0, hit[6].query_coverage)
    def test_domtab_31b1_hmmsearch_001(self):
        "Test parsing hmmsearch-domtab, hmmsearch 3.1b1, single query (domtab_31b1_hmmsearch_001)"

        tab_file = get_file('domtab_31b1_hmmsearch_001.out')
        qresults = list(parse(tab_file, self.fmt))

        self.assertEqual(1, len(qresults))

        qresult = qresults[0]
        self.assertEqual('Pkinase', qresult.id)
        self.assertEqual('PF00069.17', qresult.accession)
        self.assertEqual(260, qresult.seq_len)
        hit = qresult[0]
        self.assertEqual(2, len(hit))
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id)
        self.assertEqual('Pkinase', hit.query_id)
        self.assertEqual('-', hit.accession)
        self.assertEqual(733, hit.seq_len)
        self.assertEqual(8.5e-147, hit.evalue)
        self.assertEqual(492.3, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual('Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=1 SV=1', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id)
        self.assertEqual('Pkinase', hsp.query_id)
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(2.6e-75, hsp.evalue_cond)
        self.assertEqual(3.6e-70, hsp.evalue)
        self.assertEqual(241.2, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        self.assertEqual(58, hsp.hit_start)
        self.assertEqual(318, hsp.hit_end)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(260, hsp.query_end)
        self.assertEqual(58, hsp.env_start)
        self.assertEqual(318, hsp.env_end)
        self.assertEqual(0.95, hsp.acc_avg)
    def test_tab_2226_tblastn_011(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_011)"

        xml_file = get_file('tab_2226_tblastn_011.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        # test first qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('random_s00', qresult.id)
        self.assertEqual(0, len(qresult))

        # test second qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.accession)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.accession_version)
        self.assertEqual('0', qresult.gi)
        self.assertEqual(102, qresult.seq_len)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id)
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id_all)
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.accession)
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.accession_version)
        self.assertEqual('0', hit.gi)
        self.assertEqual('0', hit.gi_all)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(4632, hit.seq_len)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(34.88, hsp.ident_pct)
        self.assertEqual(43, hsp.aln_span)
        self.assertEqual(28, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(30, hsp.query_start)
        self.assertEqual(73, hsp.query_end)
        self.assertEqual(1743, hsp.hit_start)
        self.assertEqual(1872, hsp.hit_end)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)
        self.assertEqual('PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD', str(hsp.query.seq))
        self.assertEqual('PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID', str(hsp.hit.seq))
        self.assertEqual(78, hsp.bitscore_raw)
        self.assertEqual(15, hsp.ident_num)
        self.assertEqual(26, hsp.pos_num)
        self.assertEqual(0, hsp.gap_num)
        self.assertEqual(60.47, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(1, hsp.hit_frame)

        hit = qresult[-1]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id)
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.accession)
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.accession_version)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(33.90, hsp.ident_pct)
        self.assertEqual(59, hsp.aln_span)
        self.assertEqual(31, hsp.mismatch_num)
        self.assertEqual(1, hsp.gapopen_num)
        self.assertEqual(43, hsp.query_start)
        self.assertEqual(94, hsp.query_end)
        self.assertEqual(1056, hsp.hit_start)
        self.assertEqual(1233, hsp.hit_end)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)
        self.assertEqual('GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL', str(hsp.query.seq))
        self.assertEqual('GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL', str(hsp.hit.seq))
        self.assertEqual(70.0, hsp.bitscore_raw)
        self.assertEqual(20, hsp.ident_num)
        self.assertEqual(29, hsp.pos_num)
        self.assertEqual(8, hsp.gap_num)
        self.assertEqual(49.15, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(1, hsp.hit_frame)

        # test last qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|11464971:4-101', qresult.id)
        self.assertEqual('gi|11464971:4-101', qresult.accession)
        self.assertEqual('gi|11464971:4-101', qresult.accession_version)
        self.assertEqual('0', qresult.gi)
        self.assertEqual(98, qresult.seq_len)
        self.assertEqual(5, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id)
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id_all)
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.accession)
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.accession_version)
        self.assertEqual('0', hit.gi)
        self.assertEqual('0', hit.gi_all)
        self.assertEqual('gi|11464971:4-101', hit.query_id)
        self.assertEqual(772, hit.seq_len)
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)
        self.assertEqual(95.92, hsp.ident_pct)
        self.assertEqual(98, hsp.aln_span)
        self.assertEqual(4, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(98, hsp.query_end)
        self.assertEqual(94, hsp.hit_start)
        self.assertEqual(388, hsp.hit_end)
        self.assertEqual(2e-67, hsp.evalue)
        self.assertEqual(199, hsp.bitscore)
        self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq))
        self.assertEqual('KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK', str(hsp.hit.seq))
        self.assertEqual(506.0, hsp.bitscore_raw)
        self.assertEqual(94, hsp.ident_num)
        self.assertEqual(96, hsp.pos_num)
        self.assertEqual(0, hsp.gap_num)
        self.assertEqual(97.96, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(2, hsp.hit_frame)

        hsp = hit.hsps[-1]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)
        self.assertEqual(29.58, hsp.ident_pct)
        self.assertEqual(71, hsp.aln_span)
        self.assertEqual(46, hsp.mismatch_num)
        self.assertEqual(2, hsp.gapopen_num)
        self.assertEqual(29, hsp.query_start)
        self.assertEqual(96, hsp.query_end)
        self.assertEqual(541, hsp.hit_start)
        self.assertEqual(754, hsp.hit_end)
        self.assertEqual(4e-05, hsp.evalue)
        self.assertEqual(32.7, hsp.bitscore)
        self.assertEqual('IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq))
        self.assertEqual('LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA', str(hsp.hit.seq))
        self.assertEqual(73.0, hsp.bitscore_raw)
        self.assertEqual(21, hsp.ident_num)
        self.assertEqual(33, hsp.pos_num)
        self.assertEqual(4, hsp.gap_num)
        self.assertEqual(46.48, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(2, hsp.hit_frame)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(3, counter)
    def test_tab_2226_tblastn_010(self):
        "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_010)"

        xml_file = get_file('tab_2226_tblastn_010.txt')
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        # test first qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('random_s00', qresult.id)
        self.assertEqual(0, len(qresult))

        # test second qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)

        hit = qresult[-1]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)

        # test last qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('tblastn', qresult.program)
        self.assertEqual('db/minirefseq_mrna', qresult.target)
        self.assertEqual('2.2.26+', qresult.version)
        self.assertEqual('gi|11464971:4-101', qresult.id)
        self.assertEqual(5, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id)
        self.assertEqual('gi|11464971:4-101', hit.query_id)
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)
        self.assertEqual(2e-67, hsp.evalue)
        self.assertEqual(199, hsp.bitscore)

        hsp = hit.hsps[-1]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)
        self.assertEqual(4e-05, hsp.evalue)
        self.assertEqual(32.7, hsp.bitscore)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(3, counter)
    def test_tab_2226_tblastn_009(self):
        "Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_009)"

        xml_file = get_file('tab_2226_tblastn_009.txt')
        qresults = parse(xml_file, FMT, fields=('qseqid', 'sseqid'))
        counter = 0

        # test first qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('<unknown program>', qresult.program)
        self.assertEqual('<unknown target>', qresult.target)
        self.assertEqual('<unknown version>', qresult.version)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit[0]
        self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)

        hit = qresult[-1]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit[0]
        self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id)
        self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id)

        # test last qresult
        qresult = qresults.next()
        counter += 1

        self.assertEqual('<unknown program>', qresult.program)
        self.assertEqual('<unknown target>', qresult.target)
        self.assertEqual('<unknown version>', qresult.version)
        self.assertEqual('gi|11464971:4-101', qresult.id)
        self.assertEqual(5, len(qresult))

        hit = qresult[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id)
        self.assertEqual('gi|11464971:4-101', hit.query_id)
        self.assertEqual(2, len(hit))

        hsp = hit[0]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)

        hsp = hit[-1]
        self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id)
        self.assertEqual('gi|11464971:4-101', hsp.query_id)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, qresults.next, )
        self.assertEqual(2, counter)
    def test_4p79(self):
        "Test parsing 4p79_hhsearch_server_NOssm.hhr"

        txt_file = get_file('4p79_hhsearch_server_NOssm.hhr')
        qresults = parse(txt_file, FMT)

        # test first and only qresult
        qresult = next(qresults)

        num_hits = 8
        self.assertEqual('HHSUITE', qresult.program)
        self.assertEqual('4P79:A|PDBID|CHAIN|SEQUENCE', qresult.id)
        self.assertEqual(198, qresult.seq_len)
        self.assertEqual(num_hits, len(qresult))

        hit = qresult[0]
        self.assertEqual('4P79_A', hit.id)
        self.assertEqual('cell adhesion protein; cell adhesion, tight junction, membrane; HET: OLC'
                         ', MSE; 2.4A {Mus musculus}', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(6.8e-32, hit.evalue)
        self.assertEqual(194.63, hit.score)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertTrue(hsp.is_included)
        self.assertEqual(0, hsp.output_index)
        self.assertEqual(6.8e-32, hsp.evalue)
        self.assertEqual(194.63, hsp.score)
        self.assertEqual(99.94, hsp.prob)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(198, hsp.hit_end)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(198, hsp.query_end)
        self.assertEqual('GSEFMSVAVETFGFFMSALGLLMLGLTLSNSYWRVSTVHGNVITTNTIFENLWYSCATDSLGVSNCWDFPSMLALSGYVQ'
                         'GCRALMITAILLGFLGLFLGMVGLRATNVGNMDLSKKAKLLAIAGTLHILAGACGMVAISWYAVNITTDFFNPLYAGTKY'
                         'ELGPALYLGWSASLLSILGGICVFSTAAASSKEEPATR', str(hsp.query.seq))
        self.assertEqual('GSEFMSVAVETFGFFMSALGLLMLGLTLSNSYWRVSTVHGNVITTNTIFENLWYSCATDSLGVSNCWDFPSMLALSGYVQ'
                         'GCRALMITAILLGFLGLFLGMVGLRATNVGNMDLSKKAKLLAIAGTLHILAGACGMVAISWYAVNITTDFFNPLYAGTKY'
                         'ELGPALYLGWSASLLSILGGICVFSTAAASSKEEPATR', str(hsp.hit.seq))

        # Check last hit
        hit = qresult[num_hits - 1]
        self.assertEqual('5YQ7_F', hit.id)
        self.assertEqual('Beta subunit of light-harvesting 1; Photosynthetic core complex, PHOTOSYNTHESIS; '
                         'HET: MQE, BCL, HEM, KGD, BPH;{Roseiflexus castenholzii}; Related PDB entries: 5YQ7_V'
                         ' 5YQ7_3 5YQ7_T 5YQ7_J 5YQ7_9 5YQ7_N 5YQ7_A 5YQ7_P 5YQ7_H 5YQ7_D 5YQ7_5 5YQ7_7 5YQ7_1 '
                         '5YQ7_R', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(6.7, hit.evalue)
        self.assertEqual(20.51, hit.score)
        self.assertEqual(1, len(hit))

        # Check we can get the original last HSP from the file.
        num_hsps = num_hits
        self.assertEqual(num_hsps, len(qresult.hsps))

        hsp = qresult.hsps[-1]
        self.assertTrue(hsp.is_included)
        self.assertEqual(num_hsps - 1, hsp.output_index)
        self.assertEqual(6.7, hsp.evalue)
        self.assertEqual(20.51, hsp.score)
        self.assertEqual(52.07, hsp.prob)
        self.assertEqual(8, hsp.hit_start)
        self.assertEqual(42, hsp.hit_end)
        self.assertEqual(5, hsp.query_start)
        self.assertEqual(37, hsp.query_end)
        self.assertEqual('RTSVVVSTLLGLVMALLIHFVVLSSGAFNWLRAP', str(hsp.hit.seq))
        self.assertEqual('SVAVETFGFFMSALGLLMLGLTLSNS--YWRVST', str(hsp.query.seq))
    def test_4y9h_nossm(self):
        "Test parsing 4y9h_hhsearch_server_NOssm.hhr"

        txt_file = get_file('4y9h_hhsearch_server_NOssm.hhr')
        qresults = parse(txt_file, FMT)

        # test first and only qresult
        qresult = next(qresults)

        num_hits = 29
        self.assertEqual('HHSUITE', qresult.program)
        self.assertEqual('4Y9H:A|PDBID|CHAIN|SEQUENCE', qresult.id)
        self.assertEqual(226, qresult.seq_len)
        self.assertEqual(num_hits, len(qresult))

        hit = qresult[0]
        self.assertEqual('5ZIM_A', hit.id)
        self.assertEqual('Bacteriorhodopsin; proton pump, membrane protein, PROTON; HET: L2P, RET; 1.25A {Halobacterium'
                         ' salinarum}; Related PDB entries: 1R84_A 1KG8_A 1KME_B 1KGB_A 1KG9_A 1KME_A 4X31_A 5ZIL_A 1E0P_A '
                         '4X32_A 5ZIN_A 1S53_B 1S51_B 1S53_A 1S54_A 1F50_A 1S54_B 1S51_A 1F4Z_A 5J7A_A 1S52_B 1S52_A 4Y9H_A '
                         '3T45_A 3T45_C 3T45_B 1C3W_A 1L0M_A', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(2.1e-48, hit.evalue)
        self.assertEqual(320.44, hit.score)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertTrue(hsp.is_included)
        self.assertEqual(0, hsp.output_index)
        self.assertEqual(2.1e-48, hsp.evalue)
        self.assertEqual(320.44, hsp.score)
        self.assertEqual(100.00, hsp.prob)
        self.assertEqual(1, hsp.hit_start)
        self.assertEqual(227, hsp.hit_end)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(226, hsp.query_end)
        self.assertEqual('GRPEWIWLALGTALMGLGTLYFLVKGMGVSDPDAKKFYAITTLVPAIAFTMYLSMLLGYGLTMVPFGGEQNPIYWARYAD'
                         'WLFTTPLLLLDLALLVDADQGTILALVGADGIMIGTGLVGALTKVYSYRFVWWAISTAAMLYILYVLFFGFTSKAESMRP'
                         'EVASTFKVLRNVTVVLWSAYPVVWLIGSEGAGIVPLNIETLLFMVLDVSAKVGFGLILLRSRAIFG', str(hsp.hit.seq))
        self.assertEqual('GRPEWIWLALGTALMGLGTLYFLVKGMGVSDPDAKKFYAITTLVPAIAFTMYLSMLLGYGLTMVPFGGEQNPIYWARYAD'
                         'WLFTTPLLLLDLALLVDADQGTILALVGADGIMIGTGLVGALTKVYSYRFVWWAISTAAMLYILYVLFFGFTSKAESMRP'
                         'EVASTFKVLRNVTVVLWSAYPVVWLIGSEGAGIVPLNIETLLFMVLDVSAKVGFGLILLRSRAIFG', str(hsp.query.seq))

        # Check last hit
        hit = qresult[num_hits - 1]
        self.assertEqual('5ABB_Z', hit.id)
        self.assertEqual('PROTEIN TRANSLOCASE SUBUNIT SECY, PROTEIN; TRANSLATION, RIBOSOME, MEMBRANE PROTEIN, '
                         'TRANSLOCON; 8.0A {ESCHERICHIA COLI}', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(3.3e-05, hit.evalue)
        self.assertEqual(51.24, hit.score)
        self.assertEqual(1, len(hit))

        # Check we can get the original last HSP from the file.
        num_hsps = num_hits
        self.assertEqual(num_hsps, len(qresult.hsps))
        hsp = qresult.hsps[-1]

        self.assertTrue(hsp.is_included)
        self.assertEqual(num_hsps - 1, hsp.output_index)
        self.assertEqual(3.3e-05, hsp.evalue)
        self.assertEqual(51.24, hsp.score)
        self.assertEqual(96.55, hsp.prob)
        self.assertEqual(14, hsp.hit_start)
        self.assertEqual(65, hsp.hit_end)
        self.assertEqual(7, hsp.query_start)
        self.assertEqual(59, hsp.query_end)
        self.assertEqual('FWLVTAALLASTVFFFVERDRVS-AKWKTSLTVSGLVTGIAFWHYMYMRGVW', str(hsp.hit.seq))
        self.assertEqual('LALGTALMGLGTLYFLVKGMGVSDPDAKKFYAITTLVPAIAFTMYLSMLLGY', str(hsp.query.seq))
    def test_tab_2226_tblastn_011(self):
        """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_011)."""
        xml_file = get_file("tab_2226_tblastn_011.txt")
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        # test first qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("db/minirefseq_mrna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("random_s00", qresult.id)
        self.assertEqual(0, len(qresult))

        # test second qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("db/minirefseq_mrna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.accession)
        self.assertEqual("gi|16080617|ref|NP_391444.1|",
                         qresult.accession_version)
        self.assertEqual("0", qresult.gi)
        self.assertEqual(102, qresult.seq_len)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
        self.assertEqual(["gi|145479850|ref|XM_001425911.1|"], hit.id_all)
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.accession)
        self.assertEqual("gi|145479850|ref|XM_001425911.1|",
                         hit.accession_version)
        self.assertEqual("0", hit.gi)
        self.assertEqual("0", hit.gi_all)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(4632, hit.seq_len)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(34.88, hsp.ident_pct)
        self.assertEqual(43, hsp.aln_span)
        self.assertEqual(28, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(30, hsp.query_start)
        self.assertEqual(73, hsp.query_end)
        self.assertEqual(1743, hsp.hit_start)
        self.assertEqual(1872, hsp.hit_end)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)
        self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD",
                         hsp.query.seq)
        self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID",
                         hsp.hit.seq)
        self.assertEqual(78, hsp.bitscore_raw)
        self.assertEqual(15, hsp.ident_num)
        self.assertEqual(26, hsp.pos_num)
        self.assertEqual(0, hsp.gap_num)
        self.assertEqual(60.47, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(1, hsp.hit_frame)

        hit = qresult[-1]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.accession)
        self.assertEqual("gi|115975252|ref|XM_001180111.1|",
                         hit.accession_version)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(33.90, hsp.ident_pct)
        self.assertEqual(59, hsp.aln_span)
        self.assertEqual(31, hsp.mismatch_num)
        self.assertEqual(1, hsp.gapopen_num)
        self.assertEqual(43, hsp.query_start)
        self.assertEqual(94, hsp.query_end)
        self.assertEqual(1056, hsp.hit_start)
        self.assertEqual(1233, hsp.hit_end)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)
        self.assertEqual(
            "GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL",
            hsp.query.seq)
        self.assertEqual(
            "GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL",
            hsp.hit.seq)
        self.assertEqual(70.0, hsp.bitscore_raw)
        self.assertEqual(20, hsp.ident_num)
        self.assertEqual(29, hsp.pos_num)
        self.assertEqual(8, hsp.gap_num)
        self.assertEqual(49.15, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(1, hsp.hit_frame)

        # test last qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("db/minirefseq_mrna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("gi|11464971:4-101", qresult.id)
        self.assertEqual("gi|11464971:4-101", qresult.accession)
        self.assertEqual("gi|11464971:4-101", qresult.accession_version)
        self.assertEqual("0", qresult.gi)
        self.assertEqual(98, qresult.seq_len)
        self.assertEqual(5, len(qresult))

        hit = qresult[0]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
        self.assertEqual(["gi|350596019|ref|XM_003360601.2|"], hit.id_all)
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.accession)
        self.assertEqual("gi|350596019|ref|XM_003360601.2|",
                         hit.accession_version)
        self.assertEqual("0", hit.gi)
        self.assertEqual("0", hit.gi_all)
        self.assertEqual("gi|11464971:4-101", hit.query_id)
        self.assertEqual(772, hit.seq_len)
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
        self.assertEqual("gi|11464971:4-101", hsp.query_id)
        self.assertEqual(95.92, hsp.ident_pct)
        self.assertEqual(98, hsp.aln_span)
        self.assertEqual(4, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(98, hsp.query_end)
        self.assertEqual(94, hsp.hit_start)
        self.assertEqual(388, hsp.hit_end)
        self.assertEqual(2e-67, hsp.evalue)
        self.assertEqual(199, hsp.bitscore)
        self.assertEqual(
            "KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK",
            hsp.query.seq,
        )
        self.assertEqual(
            "KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK",
            hsp.hit.seq,
        )
        self.assertEqual(506.0, hsp.bitscore_raw)
        self.assertEqual(94, hsp.ident_num)
        self.assertEqual(96, hsp.pos_num)
        self.assertEqual(0, hsp.gap_num)
        self.assertEqual(97.96, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(2, hsp.hit_frame)

        hsp = hit.hsps[-1]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
        self.assertEqual("gi|11464971:4-101", hsp.query_id)
        self.assertEqual(29.58, hsp.ident_pct)
        self.assertEqual(71, hsp.aln_span)
        self.assertEqual(46, hsp.mismatch_num)
        self.assertEqual(2, hsp.gapopen_num)
        self.assertEqual(29, hsp.query_start)
        self.assertEqual(96, hsp.query_end)
        self.assertEqual(541, hsp.hit_start)
        self.assertEqual(754, hsp.hit_end)
        self.assertEqual(4e-05, hsp.evalue)
        self.assertEqual(32.7, hsp.bitscore)
        self.assertEqual(
            "IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA",
            hsp.query.seq,
        )
        self.assertEqual(
            "LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA",
            hsp.hit.seq,
        )
        self.assertEqual(73.0, hsp.bitscore_raw)
        self.assertEqual(21, hsp.ident_num)
        self.assertEqual(33, hsp.pos_num)
        self.assertEqual(4, hsp.gap_num)
        self.assertEqual(46.48, hsp.pos_pct)
        self.assertEqual(0, hsp.query_frame)
        self.assertEqual(2, hsp.hit_frame)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(3, counter)
    def test_q9bsu1(self):
        "Test parsing hhsearch_q9bsu1_uniclust_w_ss_pfamA_30.hhr"

        txt_file = get_file('hhsearch_q9bsu1_uniclust_w_ss_pfamA_30.hhr')
        qresults = parse(txt_file, FMT)

        # test first and only qresult
        qresult = next(qresults)

        num_hits = 12
        self.assertEqual('HHSUITE', qresult.program)
        self.assertEqual('sp|Q9BSU1|CP070_HUMAN UPF0183 protein C16orf70 OS=H**o sapiens OX=9606 GN=C16orf70'
                         ' PE=1 SV=1', qresult.id)
        self.assertEqual(422, qresult.seq_len)
        self.assertEqual(num_hits, len(qresult))

        hit = qresult[0]
        self.assertEqual('PF03676.13', hit.id)
        self.assertEqual('UPF0183 ; Uncharacterised protein family (UPF0183)', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(2e-106, hit.evalue)
        self.assertEqual(822.75, hit.score)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertTrue(hsp.is_included)
        self.assertEqual(0, hsp.output_index)
        self.assertEqual(2e-106, hsp.evalue)
        self.assertEqual(822.75, hsp.score)
        self.assertEqual(100.00, hsp.prob)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(395, hsp.hit_end)
        self.assertEqual(10, hsp.query_start)
        self.assertEqual(407, hsp.query_end)
        self.assertEqual('SLGNEQWEFTLGMPLAQAVAILQKHCRIIKNVQVLYSEQSPLSHDLILNLTQDGIKLMFDAFNQRLKVIEVCDLTKVKLK'
                         'YCGVHFNSQAIAPTIEQIDQSFGATHPGVYNSAEQLFHLNFRGLSFSFQLDSWTEAPKYEPNFAHGLASLQIPHGATVKR'
                         'MYIYSGNSLQDTKAPMMPLSCFLGNVYAESVDVLRDGTGPAGLRLRLLAAGCGPGLLADAKMRVFERSVYFGDSCQDVLS'
                         'MLGSPHKVFYKSEDKMKIHSPSPHKQVPSKCNDYFFNYFTLGVDILFDANTHKVKKFVLHTNYPGHYNFNIYHRCEFKIP'
                         'LAIKKENADGQTE--TCTTYSKWDNIQELLGHPVEKPVVLHRSSSPNNTNPFGSTFCFGLQRMIFEVMQNNHIASVTLY',
                         str(hsp.query.seq))
        self.assertEqual('EQWE----FALGMPLAQAISILQKHCRIIKNVQVLYSEQMPLSHDLILNLTQDGIKLLFDACNQRLKVIEVYDLTKVKLK'
                         'YCGVHFNSQAIAPTIEQIDQSFGATHPGVYNAAEQLFHLNFRGLSFSFQLDSWSEAPKYEPNFAHGLASLQIPHGATVKR'
                         'MYIYSGNNLQETKAPAMPLACFLGNVYAECVEVLRDGAGPLGLKLRLLTAGCGPGVLADTKVRAVERSIYFGDSCQDVLS'
                         'ALGSPHKVFYKSEDKMKIHSPSPHKQVPSKCNDYFFNYYILGVDILFDSTTHLVKKFVLHTNFPGHYNFNIYHRCDFKIP'
                         'LIIKKDGADAHSEDCILTTYSKWDQIQELLGHPMEKPVVLHRSSSANNTNPFGSTFCFGLQRMIFEVMQNNHIASVTLY',
                         str(hsp.hit.seq))

        # Check last hit
        hit = qresult[num_hits - 1]
        self.assertEqual('PF10049.8', hit.id)
        self.assertEqual('DUF2283 ; Protein of unknown function (DUF2283)', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(78, hit.evalue)
        self.assertEqual(19.81, hit.score)
        self.assertEqual(1, len(hit))

        # Check we can get the original last HSP from the file.
        num_hsps = 16
        self.assertEqual(num_hsps, len(qresult.hsps))

        hsp = qresult.hsps[-1]
        self.assertTrue(hsp.is_included)
        self.assertEqual(num_hsps - 1, hsp.output_index)
        self.assertEqual(78, hsp.evalue)
        self.assertEqual(19.81, hsp.score)
        self.assertEqual(20.88, hsp.prob)
        self.assertEqual(25, hsp.hit_start)
        self.assertEqual(48, hsp.hit_end)
        self.assertEqual(61, hsp.query_start)
        self.assertEqual(85, hsp.query_end)
        self.assertEqual('APNVIFDYDA-EGRIVGIELLDAR', str(hsp.hit.seq))
        self.assertEqual('QDGIKLMFDAFNQRLKVIEVCDLT', str(hsp.query.seq))
    def test_hmmpfam_24(self):
        """Test parsing hmmpfam 2.4 file (text_24_hmmpfam_001.out)."""
        results = list(
            parse(path.join("Hmmer", "text_24_hmmpfam_001.out"), self.fmt))
        self.assertEqual(5, len(results))

        # first qresult
        res = results[0]
        self.assertEqual("random_s00", res.id)
        self.assertEqual("[none]", res.accession)
        self.assertEqual("[none]", res.description)
        self.assertEqual("hmmpfam", res.program)
        self.assertEqual("2.4i", res.version)
        self.assertEqual("/home/bow/db/hmmer/Pfam_fs", res.target)
        self.assertEqual(0, len(res))

        # fourth qresult
        res = results[3]
        self.assertEqual("gi|22748937|ref|NP_065801.1|", res.id)
        self.assertEqual("[none]", res.accession)
        self.assertEqual("exportin-5 [H**o sapiens]", res.description)
        self.assertEqual("hmmpfam", res.program)
        self.assertEqual("2.4i", res.version)
        self.assertEqual("/home/bow/db/hmmer/Pfam_fs", res.target)
        self.assertEqual(33, len(res))

        # fourth qresult, first hit
        hit = res[0]
        self.assertEqual("Xpo1", hit.id)
        self.assertEqual("Exportin 1-like protein", hit.description)
        self.assertAlmostEqual(170.1, hit.bitscore)
        self.assertAlmostEqual(5.1e-48, hit.evalue)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, len(hit))

        # fourth qresult, first hit, first hsp
        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertAlmostEqual(170.1, hsp.bitscore)
        self.assertAlmostEqual(5.1e-148, hsp.evalue)
        self.assertEqual(108, hsp.query_start)
        self.assertEqual(271, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertEqual("ENHIKDALSRIVVEMIKREWPQHWPDMLIELDTLSKQG--",
                         str(hsp.query.seq)[:40])
        self.assertEqual(
            "+++++  L+++++e++k+ewP++Wp+ + +l  l++++  ",
            str(hsp.aln_annotation["similarity"])[:40],
        )
        self.assertEqual("WVSMSHITA-ENCkLLEILCLLL----NEQELQLGAAECL",
                         str(hsp.query.seq)[-40:])
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(178, hsp.hit_end)
        self.assertEqual("[]", hsp.hit_endtype)
        self.assertEqual("pkflrnKLalalaelakqewPsnWpsffpdlvsllsssss",
                         str(hsp.hit.seq)[:40])
        self.assertEqual(
            "W+++++i + ++++ll++l+ lL    +  +l++ A+eCL",
            str(hsp.aln_annotation["similarity"])[-40:],
        )
        self.assertEqual("Wipiglianvnpi.llnllfslLsgpesdpdlreaAveCL",
                         str(hsp.hit.seq)[-40:])

        # fourth qresult, second from last hit
        hit = res[-2]
        self.assertEqual("Rad50_zn_hook", hit.id)
        self.assertEqual("Rad50 zinc hook motif", hit.description)
        self.assertAlmostEqual(2.2, hit.bitscore)
        self.assertAlmostEqual(9.2, hit.evalue)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, len(hit))

        # fourth qresult, second from last hit, first hsp
        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertAlmostEqual(0.8, hsp.bitscore)
        self.assertAlmostEqual(22, hsp.evalue)
        self.assertEqual(20, hsp.query_start)
        self.assertEqual(47, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertEqual("MDPNSTQRYRLEALKFCEEFKE-KCPIC", str(hsp.query.seq))
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(28, hsp.hit_end)
        self.assertEqual("[.", hsp.hit_endtype)
        self.assertEqual("galesekaelkkaieeleeeesscCPvC", str(hsp.hit.seq))

        # fourth qresult, second from last hit, last hsp
        hsp = hit[-1]
        self.assertEqual(2, hsp.domain_index)
        self.assertAlmostEqual(1.3, hsp.bitscore)
        self.assertAlmostEqual(16, hsp.evalue)
        self.assertEqual(789, hsp.query_start)
        self.assertEqual(811, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertEqual("EMLAKMAEPFTKALDMLDAEKS", str(hsp.query.seq))
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(22, hsp.hit_end)
        self.assertEqual("[.", hsp.hit_endtype)
        self.assertEqual("galesekaelkkaieeleeees", str(hsp.hit.seq))
    def test_hmmpfam_21(self):
        """Test parsing hmmpfam 2.1 file (text_21_hmmpfam_001.out)."""
        results = parse(path.join("Hmmer", "text_21_hmmpfam_001.out"),
                        self.fmt)
        res = next(results)
        self.assertEqual("roa1_drome", res.id)
        self.assertEqual("<unknown description>", res.description)
        self.assertEqual("hmmpfam", res.program)
        self.assertEqual("2.1.1", res.version)
        self.assertEqual("pfam", res.target)
        self.assertEqual(1, len(res))

        hit = res[0]
        self.assertEqual("SEED", hit.id)
        self.assertEqual("<unknown description>", hit.description)
        self.assertAlmostEqual(146.1, hit.bitscore)
        self.assertAlmostEqual(6.3e-40, hit.evalue)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, len(hit))

        hsp = hit[0]
        self.assertEqual(1, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(77, hsp.hit_end)
        self.assertEqual("[]", hsp.hit_endtype)
        self.assertEqual(32, hsp.query_start)
        self.assertEqual(103, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertAlmostEqual(71.2, hsp.bitscore)
        self.assertAlmostEqual(2.2e-17, hsp.evalue)
        self.assertEqual(
            "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv",
            str(hsp.hit.seq),
        )
        self.assertEqual(
            "lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf+F+++  ++  + A +    +++++gr+++ ",
            str(hsp.aln_annotation["similarity"]),
        )
        self.assertEqual(
            "LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP",
            str(hsp.query.seq),
        )

        hsp = hit[1]
        self.assertEqual(2, hsp.domain_index)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(77, hsp.hit_end)
        self.assertEqual("[]", hsp.hit_endtype)
        self.assertEqual(123, hsp.query_start)
        self.assertEqual(194, hsp.query_end)
        self.assertEqual("..", hsp.query_endtype)
        self.assertAlmostEqual(75.5, hsp.bitscore)
        self.assertAlmostEqual(1.1e-18, hsp.evalue)
        self.assertEqual(
            "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv",
            str(hsp.hit.seq),
        )
        self.assertEqual(
            "lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +GfaFVeF++++ ++k +     ++l+g+ + v",
            str(hsp.aln_annotation["similarity"]),
        )
        self.assertEqual(
            "LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV",
            str(hsp.query.seq),
        )
    def test_9590198(self):
        "Test parsing hhpred_9590198.hhr"

        txt_file = get_file('hhpred_9590198.hhr')
        qresults = parse(txt_file, FMT)

        # test first and only qresult
        qresult = next(qresults)

        num_hits = 22
        self.assertEqual('HHSUITE', qresult.program)
        self.assertEqual('sp|Q9BSU1|CP070_HUMAN UPF0183 protein C16orf70 OS=H**o sapiens OX=9606 GN=C16orf70'
                         ' PE=1 SV=1',
                         qresult.id)
        self.assertEqual(422, qresult.seq_len)
        self.assertEqual(num_hits, len(qresult))

        hit = qresult[0]
        self.assertEqual('PF03676.14', hit.id)
        self.assertEqual('UPF0183 ; Uncharacterised protein family (UPF0183)', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(9.9e-102, hit.evalue)
        self.assertEqual(792.76, hit.score)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertTrue(hsp.is_included)
        self.assertEqual(0, hsp.output_index)
        self.assertEqual(9.9e-102, hsp.evalue)
        self.assertEqual(792.76, hsp.score)
        self.assertEqual(100.00, hsp.prob)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(394, hsp.hit_end)
        self.assertEqual(21, hsp.query_start)
        self.assertEqual(407, hsp.query_end)
        self.assertEqual('GMHFSQSVAIIQSQVGTIRGVQVLYSDQNPLSVDLVINMPQDGMRLIFDPVAQRLKIIEIYNMKLVKLRYSGMCFNSPEI'
                         'TPSIEQVEHCFGATHPGLYDSQRHLFALNFRGLSFYFPVDS-----KFEPGYAHGLGSLQFPNGGSPVVSRTTIYYGSQH'
                         'QLSSNTSSRVSGVPLPDLPLSCYRQQLHLRRCDVLRNTTSTMGLRLHMFTEGT--SRALEPSQVALVRVVRFGDSCQGVA'
                         'RALGAPARLYYKADDKMRIHRPTARRR-PPPASDYLFNYFTLGLDVLFDARTNQVKKFVLHTNYPGHYNFNMYHRCEFEL'
                         'TVQPD-KSEAHSLVESGGGVAVTAYSKWEVVSRAL-RVCERPVVLNRASSTNTTNPFGSTFCYGYQDIIFEVMSNNYIAS'
                         'ITLY', str(hsp.hit.seq))
        self.assertEqual('GMPLAQAVAILQKHCRIIKNVQVLYSEQSPLSHDLILNLTQDGIKLMFDAFNQRLKVIEVCDLTKVKLKYCGVHFNSQAI'
                         'APTIEQIDQSFGATHPGVYNSAEQLFHLNFRGLSFSFQLDSWTEAPKYEPNFAHGLASLQIPHGA--TVKRMYIYSGNSL'
                         'Q---------DTKA-PMMPLSCFLGNVYAESVDVLRDGTGPAGLRLRLLAAGCGPGLLADAKMRVFERSVYFGDSCQDVL'
                         'SMLGSPHKVFYKSEDKMKIHSPSPHKQVPSKCNDYFFNYFTLGVDILFDANTHKVKKFVLHTNYPGHYNFNIYHRCEFKI'
                         'PLAIKKENADG------QTETCTTYSKWDNIQELLGHPVEKPVVLHRSSSPNNTNPFGSTFCFGLQRMIFEVMQNNHIAS'
                         'VTLY', str(hsp.query.seq))

        # Check last hit
        hit = qresult[num_hits - 1]
        self.assertEqual('4IL7_A', hit.id)
        self.assertEqual('Putative uncharacterized protein; partial jelly roll fold, hypothetical; 1.4A '
                         '{Sulfolobus turreted icosahedral virus}', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(6.8e+02, hit.evalue)
        self.assertEqual(22.72, hit.score)
        self.assertEqual(1, len(hit))

        # Check we can get the original last HSP from the file.
        num_hsps = 34
        self.assertEqual(num_hsps, len(qresult.hsps))

        hsp = qresult.hsps[-1]
        self.assertTrue(hsp.is_included)
        self.assertEqual(num_hsps - 1, hsp.output_index)
        self.assertEqual(3.9e+02, hsp.evalue)
        self.assertEqual(22.84, hsp.score)
        self.assertEqual(21.56, hsp.prob)
        self.assertEqual(7, hsp.hit_start)
        self.assertEqual(96, hsp.hit_end)
        self.assertEqual(18, hsp.query_start)
        self.assertEqual(114, hsp.query_end)
        self.assertEqual('FTLGMPLAQAVAILQKHCRIIKNVQVLYSEQSPLSHDLILNLTQDGIKLMFDAFNQRLKVIEVCDLTKVKLKYCGVH-FN'
                         'SQAIAPTIEQIDQSFGA', str(hsp.query.seq))
        self.assertEqual('IQFGMDRTLVWQLAGADQSCSDQVERIICYNNPDH-------YGPQGHFFFNA-ADKLIHKRQMELFPAPKPTMRLATYN'
                         'KTQTGMTEAQFWAAVPS', str(hsp.hit.seq))
Beispiel #52
0
df = pd.read_table('../presence_absence-merged.tab', index_col=0)

#
# evaluate hmmer outputs
#
hmm_positives = {}
for group in groups.keys():

    group = group.replace('&', '-')

    result  = group+'.hmm.hmmout'
    if not isfile('%s/hmm/%s' %(vfdb_folder, result)):
        print '%s not found!' %result
        continue

    result = parse('%s/hmm/%s' %(vfdb_folder, result), 'hmmer3-text').next()

    if not result.hits:
        continue

    best_hit = result.hits[0]
    if best_hit.evalue <= 1e-10:
        hsp = best_hit.hsps[0]
        hmm_positives[group] = {
            'bitscore'  : best_hit.bitscore,
            'bias'      : best_hit.bias,
            'evalue'    : best_hit.evalue,
            'acc'       : hsp.acc_avg,
            'coverage'  : float(hsp.query_end - hsp.query_start) / result.seq_len,
            'desc'      : control_ids[best_hit.id]
        }
    def test_30_hmmscan_002(self):
        """Test parsing hmmer3-tab, hmmscan 3.0, single query, no hits (tab_30_hmmscan_002)."""
        tab_file = get_file('tab_30_hmmscan_002.out')
        qresults = parse(tab_file, FMT)

        self.assertRaises(StopIteration, next, qresults)
    def test_allx(self):
        "Test parsing allx.hhr"

        txt_file = get_file('allx.hhr')
        qresults = parse(txt_file, FMT)

        # test first and only qresult
        qresult = next(qresults)

        num_hits = 10
        self.assertEqual('HHSUITE', qresult.program)
        self.assertEqual('Only X amino acids', qresult.id)
        self.assertEqual(39, qresult.seq_len)
        self.assertEqual(num_hits, len(qresult))

        hit = qresult[0]
        self.assertEqual('1klr_A', hit.id)
        self.assertEqual('Zinc finger Y-chromosomal protein; transcription; NMR {Synthetic} SCOP: g.37.1.1 PDB: '
                         '5znf_A 1kls_A 1xrz_A* 7znf_A', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(3.4E+04, hit.evalue)
        self.assertEqual(-0.01, hit.score)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertTrue(hsp.is_included)
        self.assertEqual(0, hsp.output_index)
        self.assertEqual(3.4E+04, hsp.evalue)
        self.assertEqual(-0.01, hsp.score)
        self.assertEqual(0.04, hsp.prob)
        self.assertEqual(23, hsp.hit_start)
        self.assertEqual(24, hsp.hit_end)
        self.assertEqual(38, hsp.query_start)
        self.assertEqual(39, hsp.query_end)
        self.assertEqual('T', str(hsp.hit.seq))
        self.assertEqual('X', str(hsp.query.seq))

        # Check last hit
        hit = qresult[num_hits - 1]
        self.assertEqual('1zfd_A', hit.id)
        self.assertEqual('SWI5; DNA binding motif, zinc finger DNA binding domain; NMR {Saccharomyces cerevisiae}'
                         ' SCOP: g.37.1.1', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(3.6e+04, hit.evalue)
        self.assertEqual(0.03, hit.score)
        self.assertEqual(1, len(hit))

        # Check we can get the original last HSP from the file.
        num_hsps = num_hits
        self.assertEqual(num_hsps, len(qresult.hsps))
        hsp = qresult.hsps[-1]

        self.assertTrue(hsp.is_included)
        self.assertEqual(num_hsps - 1, hsp.output_index)
        self.assertEqual(3.6e+04, hsp.evalue)
        self.assertEqual(0.03, hsp.score)
        self.assertEqual(0.03, hsp.prob)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(1, hsp.hit_end)
        self.assertEqual(3, hsp.query_start)
        self.assertEqual(4, hsp.query_end)
        self.assertEqual('D', str(hsp.hit.seq))
        self.assertEqual('X', str(hsp.query.seq))
    def test_30_hmmscan_001(self):
        "Test parsing hmmer3-tab, hmmscan 3.0, multiple queries (tab_30_hmmscan_001)"

        tab_file = get_file('tab_30_hmmscan_001.out')
        qresults = parse(tab_file, FMT)
        counter = 0

        # first qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(1, len(qresult))
        self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
        self.assertEqual('-', qresult.acc)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Globin', hit.id)
        self.assertEqual('PF00042.17', hit.acc)
        self.assertEqual(6e-21, hit.evalue)
        self.assertEqual(74.6, hit.bitscore)
        self.assertEqual(0.3, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Globin', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(9.2e-21, hsp.evalue)
        self.assertEqual(74.0, hsp.bitscore)
        self.assertEqual(0.2, hsp.bias)

        # second qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(2, len(qresult))
        self.assertEqual('gi|126362951:116-221', qresult.id)
        self.assertEqual('-', qresult.acc)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Ig_3', hit.id)
        self.assertEqual('PF13927.1', hit.acc)
        self.assertEqual(1.4e-09, hit.evalue)
        self.assertEqual(38.2, hit.bitscore)
        self.assertEqual(0.4, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Immunoglobulin domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(2.1e-09, hsp.evalue)
        self.assertEqual(37.6, hsp.bitscore)
        self.assertEqual(0.3, hsp.bias)
        hit = qresult[1]
        self.assertEqual(1, len(hit))
        self.assertEqual('Ig_2', hit.id)
        self.assertEqual('PF13895.1', hit.acc)
        self.assertEqual(3.5e-05, hit.evalue)
        self.assertEqual(23.7, hit.bitscore)
        self.assertEqual(0.1, hit.bias)
        self.assertEqual(1.1, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Immunoglobulin domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(4.3e-05, hsp.evalue)
        self.assertEqual(23.4, hsp.bitscore)
        self.assertEqual(0.1, hsp.bias)

        # third qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(2, len(qresult))
        self.assertEqual('gi|22748937|ref|NP_065801.1|', qresult.id)
        self.assertEqual('-', qresult.acc)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Xpo1', hit.id)
        self.assertEqual('PF08389.7', hit.acc)
        self.assertEqual(7.8e-34, hit.evalue)
        self.assertEqual(116.6, hit.bitscore)
        self.assertEqual(7.8, hit.bias)
        self.assertEqual(2.8, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Exportin 1-like protein', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(1.1e-33, hsp.evalue)
        self.assertEqual(116.1, hsp.bitscore)
        self.assertEqual(3.4, hsp.bias)
        hit = qresult[1]
        self.assertEqual(1, len(hit))
        self.assertEqual('IBN_N', hit.id)
        self.assertEqual('PF03810.14', hit.acc)
        self.assertEqual(0.0039, hit.evalue)
        self.assertEqual(16.9, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(2.7, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Importin-beta N-terminal domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.033, hsp.evalue)
        self.assertEqual(14.0, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)

        # last qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(5, len(qresult))
        self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id)
        self.assertEqual('-', qresult.acc)
        # first hit
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Pou', hit.id)
        self.assertEqual('PF00157.12', hit.acc)
        self.assertEqual(7e-37, hit.evalue)
        self.assertEqual(124.8, hit.bitscore)
        self.assertEqual(0.5, hit.bias)
        self.assertEqual(1.5, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Pou domain - N-terminal to homeobox domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(1.4e-36, hsp.evalue)
        self.assertEqual(123.9, hsp.bitscore)
        self.assertEqual(0.3, hsp.bias)
        # second hit
        hit = qresult[1]
        self.assertEqual(1, len(hit))
        self.assertEqual('Homeobox', hit.id)
        self.assertEqual('PF00046.24', hit.acc)
        self.assertEqual(2.1e-18, hit.evalue)
        self.assertEqual(65.5, hit.bitscore)
        self.assertEqual(1.1, hit.bias)
        self.assertEqual(1.5, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Homeobox domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(4.1e-18, hsp.evalue)
        self.assertEqual(64.6, hsp.bitscore)
        self.assertEqual(0.7, hsp.bias)
        # third hit
        hit = qresult[2]
        self.assertEqual(1, len(hit))
        self.assertEqual('HTH_31', hit.id)
        self.assertEqual('PF13560.1', hit.acc)
        self.assertEqual(0.012, hit.evalue)
        self.assertEqual(15.6, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(2.2, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(0, hit.domain_included_num)
        self.assertEqual('Helix-turn-helix domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.16, hsp.evalue)
        self.assertEqual(12.0, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        # fourth hit
        hit = qresult[3]
        self.assertEqual(1, len(hit))
        self.assertEqual('Homeobox_KN', hit.id)
        self.assertEqual('PF05920.6', hit.acc)
        self.assertEqual(0.039, hit.evalue)
        self.assertEqual(13.5, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(1.6, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(0, hit.domain_included_num)
        self.assertEqual('Homeobox KN domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.095, hsp.evalue)
        self.assertEqual(12.3, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        # fifth hit
        hit = qresult[4]
        self.assertEqual(1, len(hit))
        self.assertEqual('DUF521', hit.id)
        self.assertEqual('PF04412.8', hit.acc)
        self.assertEqual(0.14, hit.evalue)
        self.assertEqual(10.5, hit.bitscore)
        self.assertEqual(0.1, hit.bias)
        self.assertEqual(1.4, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(0, hit.domain_included_num)
        self.assertEqual('Protein of unknown function (DUF521)', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.26, hsp.evalue)
        self.assertEqual(9.6, hsp.bitscore)
        self.assertEqual(0.1, hsp.bias)

        # test if we've properly finished iteration
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(4, counter)
    def test_2uvo(self):
        "Test parsing 2uvo"

        txt_file = get_file('2uvo_hhblits.hhr')
        qresults = parse(txt_file, FMT)

        # test first and only qresult
        qresult = next(qresults)

        num_hits = 16
        self.assertEqual('HHSUITE', qresult.program)
        self.assertEqual('2UVO:A|PDBID|CHAIN|SEQUENCE', qresult.id)
        self.assertEqual(171, qresult.seq_len)
        self.assertEqual(num_hits, len(qresult))

        hit = qresult[0]
        self.assertEqual('2uvo_A', hit.id)
        self.assertEqual('Agglutinin isolectin 1; carbohydrate-binding protein, hevein domain, chitin-binding,'
                         ' GERM agglutinin, chitin-binding protein; HET: NDG NAG GOL; 1.40A {Triticum aestivum}'
                         ' PDB: 1wgc_A* 2cwg_A* 2x3t_A* 4aml_A* 7wga_A 9wga_A 2wgc_A 1wgt_A 1k7t_A* 1k7v_A* 1k7u_A'
                         ' 2x52_A* 1t0w_A*', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(3.7e-34, hit.evalue)
        self.assertEqual(210.31, hit.score)
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertTrue(hsp.is_included)
        self.assertEqual(0, hsp.output_index)
        self.assertEqual(99.95, hsp.prob)
        self.assertEqual(210.31, hsp.score)
        self.assertEqual(3.7e-34, hsp.evalue)
        self.assertEqual(0, hsp.hit_start)
        self.assertEqual(171, hsp.hit_end)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(171, hsp.query_end)
        self.assertEqual('ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGATCTNNQCCSQYGYCGFGAEYC'
                         'GAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGGCQSGACSTDKPCGKDAGGRVCTNNYCCS'
                         'KWGSCGIGPGYCGAGCQSGGCDG',
                         str(hsp.hit.seq))
        self.assertEqual('ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGATCTNNQCCSQYGYCGFGAEYC'
                         'GAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGGCQSGACSTDKPCGKDAGGRVCTNNYCCS'
                         'KWGSCGIGPGYCGAGCQSGGCDG',
                         str(hsp.query.seq))

        # Check last hit
        hit = qresult[num_hits - 1]
        self.assertEqual('4z8i_A', hit.id)
        self.assertEqual('BBTPGRP3, peptidoglycan recognition protein 3; chitin-binding domain, '
                         'AM hydrolase; 2.70A {Branchiostoma belcheri tsingtauense}', hit.description)
        self.assertTrue(hit.is_included)
        self.assertEqual(0.11, hit.evalue)
        self.assertEqual(36.29, hit.score)
        self.assertEqual(2, len(hit))

        # Check we can get the original last HSP from the file.
        num_hsps = 32
        self.assertEqual(num_hsps, len(qresult.hsps))

        hsp = qresult.hsps[-1]
        self.assertTrue(hsp.is_included)
        self.assertEqual(num_hsps - 1, hsp.output_index)
        self.assertEqual(2.6, hsp.evalue)
        self.assertEqual(25.90, hsp.score)
        self.assertEqual(40.43, hsp.prob)
        self.assertEqual(10, hsp.hit_start)
        self.assertEqual(116, hsp.hit_end)
        self.assertEqual(53, hsp.query_start)
        self.assertEqual(163, hsp.query_end)
        self.assertEqual('XCXXXXCCXXXXXCXXXXXXCXXXCXXXXCXXXXXCXXX--XXXCXXXXCCXXXXXCXXXXXXCXXXCXXXXCXXXXXCX'
                         'XX--XXXCXXXXCCXXXXXCXXXXXXCXXX',
                         str(hsp.hit.seq))
        self.assertEqual('TCTNNQCCSQYGYCGFGAEYCGAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGGCQSGACSTDKPCG'
                         'KDAGGRVCTNNYCCSKWGSCGIGPGYCGAG',
                         str(hsp.query.seq))
    def test_tab_2226_tblastn_001(self):
        """Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_001)."""
        xml_file = get_file("tab_2226_tblastn_001.txt")
        qresults = parse(xml_file, FMT)
        counter = 0

        # test first qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(34.88, hsp.ident_pct)
        self.assertEqual(43, hsp.aln_span)
        self.assertEqual(28, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(30, hsp.query_start)
        self.assertEqual(73, hsp.query_end)
        self.assertEqual(1743, hsp.hit_start)
        self.assertEqual(1872, hsp.hit_end)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)

        hit = qresult[-1]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(33.90, hsp.ident_pct)
        self.assertEqual(59, hsp.aln_span)
        self.assertEqual(31, hsp.mismatch_num)
        self.assertEqual(1, hsp.gapopen_num)
        self.assertEqual(43, hsp.query_start)
        self.assertEqual(94, hsp.query_end)
        self.assertEqual(1056, hsp.hit_start)
        self.assertEqual(1233, hsp.hit_end)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)

        # test last qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("gi|11464971:4-101", qresult.id)
        self.assertEqual(5, len(qresult))

        hit = qresult[0]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
        self.assertEqual("gi|11464971:4-101", hit.query_id)
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
        self.assertEqual("gi|11464971:4-101", hsp.query_id)
        self.assertEqual(95.92, hsp.ident_pct)
        self.assertEqual(98, hsp.aln_span)
        self.assertEqual(4, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(0, hsp.query_start)
        self.assertEqual(98, hsp.query_end)
        self.assertEqual(94, hsp.hit_start)
        self.assertEqual(388, hsp.hit_end)
        self.assertEqual(2e-67, hsp.evalue)
        self.assertEqual(199, hsp.bitscore)

        hsp = hit.hsps[-1]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
        self.assertEqual("gi|11464971:4-101", hsp.query_id)
        self.assertEqual(29.58, hsp.ident_pct)
        self.assertEqual(71, hsp.aln_span)
        self.assertEqual(46, hsp.mismatch_num)
        self.assertEqual(2, hsp.gapopen_num)
        self.assertEqual(29, hsp.query_start)
        self.assertEqual(96, hsp.query_end)
        self.assertEqual(541, hsp.hit_start)
        self.assertEqual(754, hsp.hit_end)
        self.assertEqual(4e-05, hsp.evalue)
        self.assertEqual(32.7, hsp.bitscore)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(2, counter)
    def test_tab_2226_tblastn_010(self):
        """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_010)."""
        xml_file = get_file("tab_2226_tblastn_010.txt")
        qresults = parse(xml_file, FMT, comments=True)
        counter = 0

        # test first qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("db/minirefseq_mrna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("random_s00", qresult.id)
        self.assertEqual(0, len(qresult))

        # test second qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("db/minirefseq_mrna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)

        hit = qresult[-1]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)

        # test last qresult
        qresult = next(qresults)
        counter += 1

        self.assertEqual("tblastn", qresult.program)
        self.assertEqual("db/minirefseq_mrna", qresult.target)
        self.assertEqual("2.2.26+", qresult.version)
        self.assertEqual("gi|11464971:4-101", qresult.id)
        self.assertEqual(5, len(qresult))

        hit = qresult[0]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id)
        self.assertEqual("gi|11464971:4-101", hit.query_id)
        self.assertEqual(2, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
        self.assertEqual("gi|11464971:4-101", hsp.query_id)
        self.assertEqual(2e-67, hsp.evalue)
        self.assertEqual(199, hsp.bitscore)

        hsp = hit.hsps[-1]
        self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id)
        self.assertEqual("gi|11464971:4-101", hsp.query_id)
        self.assertEqual(4e-05, hsp.evalue)
        self.assertEqual(32.7, hsp.bitscore)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(3, counter)
Beispiel #59
0
    def test_30_hmmscan_001(self):
        "Test parsing hmmer3-tab, hmmscan 3.0, multiple queries (tab_30_hmmscan_001)"

        tab_file = get_file('tab_30_hmmscan_001.out')
        qresults = parse(tab_file, FMT)
        counter = 0

        # first qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(1, len(qresult))
        self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
        self.assertEqual('-', qresult.accession)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Globin', hit.id)
        self.assertEqual('PF00042.17', hit.accession)
        self.assertEqual(6e-21, hit.evalue)
        self.assertEqual(74.6, hit.bitscore)
        self.assertEqual(0.3, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Globin', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(9.2e-21, hsp.evalue)
        self.assertEqual(74.0, hsp.bitscore)
        self.assertEqual(0.2, hsp.bias)

        # second qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(2, len(qresult))
        self.assertEqual('gi|126362951:116-221', qresult.id)
        self.assertEqual('-', qresult.accession)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Ig_3', hit.id)
        self.assertEqual('PF13927.1', hit.accession)
        self.assertEqual(1.4e-09, hit.evalue)
        self.assertEqual(38.2, hit.bitscore)
        self.assertEqual(0.4, hit.bias)
        self.assertEqual(1.3, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Immunoglobulin domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(2.1e-09, hsp.evalue)
        self.assertEqual(37.6, hsp.bitscore)
        self.assertEqual(0.3, hsp.bias)
        hit = qresult[1]
        self.assertEqual(1, len(hit))
        self.assertEqual('Ig_2', hit.id)
        self.assertEqual('PF13895.1', hit.accession)
        self.assertEqual(3.5e-05, hit.evalue)
        self.assertEqual(23.7, hit.bitscore)
        self.assertEqual(0.1, hit.bias)
        self.assertEqual(1.1, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Immunoglobulin domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(4.3e-05, hsp.evalue)
        self.assertEqual(23.4, hsp.bitscore)
        self.assertEqual(0.1, hsp.bias)

        # third qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(2, len(qresult))
        self.assertEqual('gi|22748937|ref|NP_065801.1|', qresult.id)
        self.assertEqual('-', qresult.accession)
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Xpo1', hit.id)
        self.assertEqual('PF08389.7', hit.accession)
        self.assertEqual(7.8e-34, hit.evalue)
        self.assertEqual(116.6, hit.bitscore)
        self.assertEqual(7.8, hit.bias)
        self.assertEqual(2.8, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Exportin 1-like protein', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(1.1e-33, hsp.evalue)
        self.assertEqual(116.1, hsp.bitscore)
        self.assertEqual(3.4, hsp.bias)
        hit = qresult[1]
        self.assertEqual(1, len(hit))
        self.assertEqual('IBN_N', hit.id)
        self.assertEqual('PF03810.14', hit.accession)
        self.assertEqual(0.0039, hit.evalue)
        self.assertEqual(16.9, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(2.7, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Importin-beta N-terminal domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.033, hsp.evalue)
        self.assertEqual(14.0, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)

        # last qresult
        qresult = next(qresults)
        counter += 1
        self.assertEqual(5, len(qresult))
        self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id)
        self.assertEqual('-', qresult.accession)
        # first hit
        hit = qresult[0]
        self.assertEqual(1, len(hit))
        self.assertEqual('Pou', hit.id)
        self.assertEqual('PF00157.12', hit.accession)
        self.assertEqual(7e-37, hit.evalue)
        self.assertEqual(124.8, hit.bitscore)
        self.assertEqual(0.5, hit.bias)
        self.assertEqual(1.5, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Pou domain - N-terminal to homeobox domain',
                         hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(1.4e-36, hsp.evalue)
        self.assertEqual(123.9, hsp.bitscore)
        self.assertEqual(0.3, hsp.bias)
        # second hit
        hit = qresult[1]
        self.assertEqual(1, len(hit))
        self.assertEqual('Homeobox', hit.id)
        self.assertEqual('PF00046.24', hit.accession)
        self.assertEqual(2.1e-18, hit.evalue)
        self.assertEqual(65.5, hit.bitscore)
        self.assertEqual(1.1, hit.bias)
        self.assertEqual(1.5, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(1, hit.domain_included_num)
        self.assertEqual('Homeobox domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(4.1e-18, hsp.evalue)
        self.assertEqual(64.6, hsp.bitscore)
        self.assertEqual(0.7, hsp.bias)
        # third hit
        hit = qresult[2]
        self.assertEqual(1, len(hit))
        self.assertEqual('HTH_31', hit.id)
        self.assertEqual('PF13560.1', hit.accession)
        self.assertEqual(0.012, hit.evalue)
        self.assertEqual(15.6, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(2.2, hit.domain_exp_num)
        self.assertEqual(2, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(2, hit.env_num)
        self.assertEqual(2, hit.domain_obs_num)
        self.assertEqual(2, hit.domain_reported_num)
        self.assertEqual(0, hit.domain_included_num)
        self.assertEqual('Helix-turn-helix domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.16, hsp.evalue)
        self.assertEqual(12.0, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        # fourth hit
        hit = qresult[3]
        self.assertEqual(1, len(hit))
        self.assertEqual('Homeobox_KN', hit.id)
        self.assertEqual('PF05920.6', hit.accession)
        self.assertEqual(0.039, hit.evalue)
        self.assertEqual(13.5, hit.bitscore)
        self.assertEqual(0.0, hit.bias)
        self.assertEqual(1.6, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(0, hit.domain_included_num)
        self.assertEqual('Homeobox KN domain', hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.095, hsp.evalue)
        self.assertEqual(12.3, hsp.bitscore)
        self.assertEqual(0.0, hsp.bias)
        # fifth hit
        hit = qresult[4]
        self.assertEqual(1, len(hit))
        self.assertEqual('DUF521', hit.id)
        self.assertEqual('PF04412.8', hit.accession)
        self.assertEqual(0.14, hit.evalue)
        self.assertEqual(10.5, hit.bitscore)
        self.assertEqual(0.1, hit.bias)
        self.assertEqual(1.4, hit.domain_exp_num)
        self.assertEqual(1, hit.region_num)
        self.assertEqual(0, hit.cluster_num)
        self.assertEqual(0, hit.overlap_num)
        self.assertEqual(1, hit.env_num)
        self.assertEqual(1, hit.domain_obs_num)
        self.assertEqual(1, hit.domain_reported_num)
        self.assertEqual(0, hit.domain_included_num)
        self.assertEqual('Protein of unknown function (DUF521)',
                         hit.description)
        hsp = hit.hsps[0]
        self.assertEqual(0.26, hsp.evalue)
        self.assertEqual(9.6, hsp.bitscore)
        self.assertEqual(0.1, hsp.bias)

        # test if we've properly finished iteration
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(4, counter)
    def test_tab_2226_tblastn_013(self):
        """Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_013)."""
        xml_file = get_file("tab_2226_tblastn_013.txt")
        qresults = parse(xml_file, FMT, fields="qseq std sseq")
        counter = 0

        qresult = next(qresults)
        counter += 1

        self.assertEqual("<unknown program>", qresult.program)
        self.assertEqual("<unknown target>", qresult.target)
        self.assertEqual("<unknown version>", qresult.version)
        self.assertEqual(3, len(qresult))

        hit = qresult[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(34.88, hsp.ident_pct)
        self.assertEqual(43, hsp.aln_span)
        self.assertEqual(28, hsp.mismatch_num)
        self.assertEqual(0, hsp.gapopen_num)
        self.assertEqual(30, hsp.query_start)
        self.assertEqual(73, hsp.query_end)
        self.assertEqual(1743, hsp.hit_start)
        self.assertEqual(1872, hsp.hit_end)
        self.assertEqual(1e-05, hsp.evalue)
        self.assertEqual(34.7, hsp.bitscore)
        self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD",
                         hsp.query.seq)
        self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID",
                         hsp.hit.seq)

        hit = qresult[-1]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id)
        self.assertEqual(1, len(hit))

        hsp = hit.hsps[0]
        self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id)
        self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id)
        self.assertEqual(33.90, hsp.ident_pct)
        self.assertEqual(59, hsp.aln_span)
        self.assertEqual(31, hsp.mismatch_num)
        self.assertEqual(1, hsp.gapopen_num)
        self.assertEqual(43, hsp.query_start)
        self.assertEqual(94, hsp.query_end)
        self.assertEqual(1056, hsp.hit_start)
        self.assertEqual(1233, hsp.hit_end)
        self.assertEqual(1e-04, hsp.evalue)
        self.assertEqual(31.6, hsp.bitscore)
        self.assertEqual(
            "GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL",
            hsp.query.seq)
        self.assertEqual(
            "GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL",
            hsp.hit.seq)

        # check if we've finished iteration over qresults
        self.assertRaises(StopIteration, next, qresults)
        self.assertEqual(1, counter)