def test_30_hmmscan_002(self): "Test parsing hmmer3-tab, hmmscan 3.0, single query, no hits (tab_30_hmmscan_002)" tab_file = get_file('tab_30_hmmscan_002.out') qresults = parse(tab_file, FMT) self.assertRaises(StopIteration, next, qresults)
def test_tab_2226_tblastn_013(self): "Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_013)" xml_file = get_file('tab_2226_tblastn_013.txt') qresults = parse(xml_file, FMT, fields="qseq std sseq") counter = 0 qresult = qresults.next() counter += 1 self.assertEqual('<unknown program>', qresult.program) self.assertEqual('<unknown target>', qresult.target) self.assertEqual('<unknown version>', qresult.version) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(34.88, hsp.ident_pct) self.assertEqual(43, hsp.aln_span) self.assertEqual(28, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(30, hsp.query_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1743, hsp.hit_start) self.assertEqual(1872, hsp.hit_end) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) self.assertEqual('PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD', str(hsp.query.seq)) self.assertEqual('PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID', str(hsp.hit.seq)) hit = qresult[-1] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(33.90, hsp.ident_pct) self.assertEqual(59, hsp.aln_span) self.assertEqual(31, hsp.mismatch_num) self.assertEqual(1, hsp.gapopen_num) self.assertEqual(43, hsp.query_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1056, hsp.hit_start) self.assertEqual(1233, hsp.hit_end) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) self.assertEqual('GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL', str(hsp.query.seq)) self.assertEqual('GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL', str(hsp.hit.seq)) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(1, counter)
def test_30_hmmscan_003(self): """Test parsing hmmer3-tab, hmmscan 3.0, single query, single hit, single hsp (tab_30_hmmscan_003).""" tab_file = get_file('tab_30_hmmscan_003.out') qresults = parse(tab_file, FMT) counter = 0 qresult = next(qresults) counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(1, counter)
def test_hmmpfam_21(self): """Test parsing hmmpfam 2.1 file (text_21_hmmpfam_001.out)""" results = parse(path.join("Hmmer", "text_21_hmmpfam_001.out"), self.fmt) res = next(results) self.assertEqual("roa1_drome", res.id) self.assertEqual("<unknown description>", res.description) self.assertEqual("hmmpfam", res.program) self.assertEqual("2.1.1", res.version) self.assertEqual("pfam", res.target) self.assertEqual(1, len(res)) hit = res[0] self.assertEqual("SEED", hit.id) self.assertEqual("<unknown description>", hit.description) self.assertAlmostEqual(146.1, hit.bitscore) self.assertAlmostEqual(6.3e-40, hit.evalue) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(77, hsp.hit_end) self.assertEqual("[]", hsp.hit_endtype) self.assertEqual(32, hsp.query_start) self.assertEqual(103, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertAlmostEqual(71.2, hsp.bitscore) self.assertAlmostEqual(2.2e-17, hsp.evalue) self.assertEqual( "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv", str(hsp.hit.seq) ) self.assertEqual( "lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ", str(hsp.aln_annotation["similarity"]), ) self.assertEqual( "LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP", str(hsp.query.seq) ) hsp = hit[1] self.assertEqual(2, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(77, hsp.hit_end) self.assertEqual("[]", hsp.hit_endtype) self.assertEqual(123, hsp.query_start) self.assertEqual(194, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertAlmostEqual(75.5, hsp.bitscore) self.assertAlmostEqual(1.1e-18, hsp.evalue) self.assertEqual( "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv", str(hsp.hit.seq) ) self.assertEqual( "lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v", str(hsp.aln_annotation["similarity"]), ) self.assertEqual( "LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV", str(hsp.query.seq) )
def test_tab_2228_tblastx_001(self): "Test parsing TBLASTX 2.2.28+ tabular output (tab_2228_tblastx_001)" tab_file = get_file('tab_2228_tblastx_001.txt') qresults = list(parse(tab_file, FMT, fields=list(all_fields.values()), comments=True)) # this a single query, with 192 hits and 243 hsps self.assertEqual(1, len(qresults)) self.assertEqual(192, len(qresults[0].hits)) self.assertEqual(243, sum([len(x) for x in qresults[0]])) # only checking the new fields in 2.2.28+ hit = qresults[0][0] self.assertEqual(['NM_001183135', 'EF059095'], hit.accession_all) self.assertEqual(['32630', '559292'], hit.tax_ids) self.assertEqual(['N/A', 'N/A'], hit.sci_names) self.assertEqual(['N/A', 'N/A'], hit.com_names) self.assertEqual(['N/A'], hit.blast_names) self.assertEqual(['N/A'], hit.super_kingdoms) self.assertEqual('Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA', hit.title) self.assertEqual(['Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA', 'Synthetic construct Saccharomyces cerevisiae clone ' 'FLH203015.01X MON2, complete sequence'], hit.title_all) self.assertEqual('N/A', hit.strand) self.assertEqual(100.0, hit.query_coverage) for hsp in hit[:4]: # shorthand ~ the values just happen to all be 99 # in other cases, they may be different self.assertEqual(99.0, hsp.query_coverage) self.assertEqual(73.0, hit[5].query_coverage) self.assertEqual(12.0, hit[6].query_coverage)
def test_domtab_30_hmmscan_002(self): "Test parsing hmmscan-domtab, hmmscan 3.0, single query, no hits (domtab_30_hmmscan_002)" tab_file = get_file('domtab_30_hmmscan_002.out') qresults = parse(tab_file, self.fmt) self.assertRaises(StopIteration, qresults.next, )
def test_tab_2226_tblastn_002(self): "Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_002)" xml_file = get_file('tab_2226_tblastn_002.txt') qresults = parse(xml_file, FMT) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, )
def test_31b1_hmmscan_001(self): """Test parsing hmmer3-tab, hmmscan 3.1b1, multiple queries (tab_31b1_hmmscan_001)""" tab_file = get_file('tab_31b1_hmmscan_001.out') qresults = list(parse(tab_file, FMT)) self.assertEqual(4, len(qresults)) # first qresult, first hit, first hsp qresult = qresults[0] self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(1e-22, hit.evalue) self.assertEqual(80.5, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(1.6e-22, hsp.evalue) self.assertEqual(79.8, hsp.bitscore) self.assertEqual(0.3, hsp.bias) # last qresult, last hit, last hsp qresult = qresults[-1] self.assertEqual(5, len(qresult)) self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[-1] self.assertEqual(1, len(hit)) self.assertEqual('DUF521', hit.id) self.assertEqual('PF04412.8', hit.accession) self.assertEqual(0.15, hit.evalue) self.assertEqual(10.5, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.4, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Protein of unknown function (DUF521)', hit.description) hsp = hit.hsps[0] self.assertEqual(0.28, hsp.evalue) self.assertEqual(9.6, hsp.bitscore) self.assertEqual(0.1, hsp.bias)
def test_hmmpfam_23_break_in_end_of_seq(self): """Test parsing hmmpfam 2.3 file with a line break in the end of seq marker. file (text_23_hmmpfam_004.out) """ results = parse(path.join("Hmmer", "text_23_hmmpfam_004.out"), self.fmt) res = next(results) self.assertEqual('PKSI-KS', res[0].id) self.assertEqual('PKSI-FK', res[1].id)
def test_tab_2226_tblastn_007(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_007)" xml_file = get_file('tab_2226_tblastn_007.txt') qresults = parse(xml_file, FMT, comments=True) counter = 0 qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(34.88, hsp.ident_pct) self.assertEqual(43, hsp.aln_span) self.assertEqual(28, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(30, hsp.query_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1743, hsp.hit_start) self.assertEqual(1872, hsp.hit_end) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) hit = qresult[-1] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(33.90, hsp.ident_pct) self.assertEqual(59, hsp.aln_span) self.assertEqual(31, hsp.mismatch_num) self.assertEqual(1, hsp.gapopen_num) self.assertEqual(43, hsp.query_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1056, hsp.hit_start) self.assertEqual(1233, hsp.hit_end) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(1, counter)
def test_30_hmmscan_004(self): "Test parsing hmmer3-tab, hmmscan 3.0, single query, multiple hits (tab_30_hmmscan_004)" tab_file = get_file('tab_30_hmmscan_004.out') qresults = parse(tab_file, FMT) counter = 0 qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|126362951:116-221', qresult.id) self.assertEqual('-', qresult.acc) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Ig_3', hit.id) self.assertEqual('PF13927.1', hit.acc) self.assertEqual(1.4e-09, hit.evalue) self.assertEqual(38.2, hit.bitscore) self.assertEqual(0.4, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(2.1e-09, hsp.evalue) self.assertEqual(37.6, hsp.bitscore) self.assertEqual(0.3, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Ig_2', hit.id) self.assertEqual('PF13895.1', hit.acc) self.assertEqual(3.5e-05, hit.evalue) self.assertEqual(23.7, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.1, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.3e-05, hsp.evalue) self.assertEqual(23.4, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(1, counter)
def test_31b1_hmmsearch_001(self): """Test parsing hmmer3-tab, hmmsearch 3.1b1, multiple queries (tab_31b1_hmmscan_001)""" tab_file = get_file('tab_31b1_hmmsearch_001.out') qresults = list(parse(tab_file, FMT)) self.assertEqual(1, len(qresults)) # first qresult qresult = qresults[0] self.assertEqual(4, len(qresult)) self.assertEqual('Pkinase', qresult.id) self.assertEqual('PF00069.17', qresult.accession) # first hit, first hsp hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id) self.assertEqual('-', hit.accession) self.assertEqual(8.5e-147, hit.evalue) self.assertEqual(492.3, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.1, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(2, hit.domain_included_num) self.assertEqual('Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=1 SV=1', hit.description) hsp = hit.hsps[0] self.assertEqual(1.2e-72, hsp.evalue) self.assertEqual(249.3, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # last hit, last hsp hit = qresult[-1] self.assertEqual(1, len(hit)) self.assertEqual('sp|P18652|KS6AA_CHICK', hit.id) self.assertEqual('-', hit.accession) self.assertEqual(2.6e-145, hit.evalue) self.assertEqual(487.5, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.1, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(2, hit.domain_included_num) self.assertEqual('Ribosomal protein S6 kinase 2 alpha OS=Gallus gallus GN=RPS6KA PE=2 SV=1', hit.description) hsp = hit.hsps[-1] self.assertEqual(7.6e-72, hsp.evalue) self.assertEqual(246.7, hsp.bitscore) self.assertEqual(0.0, hsp.bias)
def test_tab_2226_tblastn_005_comments_false(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_005)" tab_file = get_file('tab_2226_tblastn_005.txt') exc_msg = ("Encountered unexpected character '#' at the beginning of" " a line. Set comments=True if the file is a commented" " file.") qresults = parse(tab_file, FMT) with self.assertRaises(ValueError, msg=exc_msg): next(qresults)
def test_hmmpfam_23_no_match(self): """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_002.out)""" results = parse(path.join("Hmmer", "text_23_hmmpfam_002.out"), self.fmt) res = next(results) self.assertEqual('SEQ0001', res.id) self.assertEqual(0, len(res.hits)) res = next(results) self.assertEqual('SEQ0002', res.id) self.assertEqual(0, len(res.hits))
def test_tab_2226_tblastn_008(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_008)" xml_file = get_file('tab_2226_tblastn_008.txt') qresults = parse(xml_file, FMT, comments=True) counter = 0 qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual(5, len(qresult)) hit = qresult[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id) self.assertEqual('gi|11464971:4-101', hit.query_id) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) self.assertEqual(95.92, hsp.ident_pct) self.assertEqual(98, hsp.aln_span) self.assertEqual(4, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(0, hsp.query_start) self.assertEqual(98, hsp.query_end) self.assertEqual(94, hsp.hit_start) self.assertEqual(388, hsp.hit_end) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(199, hsp.bitscore) hsp = hit.hsps[-1] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) self.assertEqual(29.58, hsp.ident_pct) self.assertEqual(71, hsp.aln_span) self.assertEqual(46, hsp.mismatch_num) self.assertEqual(2, hsp.gapopen_num) self.assertEqual(29, hsp.query_start) self.assertEqual(96, hsp.query_end) self.assertEqual(541, hsp.hit_start) self.assertEqual(754, hsp.hit_end) self.assertEqual(4e-05, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(1, counter)
def test_hmmpfam_23(self): """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_001.out).""" results = parse(path.join("Hmmer", "text_23_hmmpfam_001.out"), self.fmt) res = next(results) self.assertEqual('gi|90819130|dbj|BAE92499.1|', res.id) self.assertEqual('glutamate synthase [Porphyra yezoensis]', res.description) self.assertEqual('[none]', res.accession) self.assertEqual('hmmpfam', res.program) self.assertEqual('2.3.2', res.version) self.assertEqual('../Shared/Pfam_fs', res.target) self.assertEqual(54, len(res)) hit = res[0] self.assertEqual('Glu_synthase', hit.id) self.assertEqual('Conserved region in glutamate synthas', hit.description) self.assertAlmostEqual(858.6, hit.bitscore) self.assertAlmostEqual(3.6e-255, hit.evalue) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(296, hsp.hit_start) self.assertEqual(323, hsp.hit_end) self.assertEqual('..', hsp.hit_endtype) self.assertEqual(649, hsp.query_start) self.assertEqual(676, hsp.query_end) self.assertEqual('..', hsp.query_endtype) self.assertAlmostEqual(1.3, hsp.bitscore) self.assertAlmostEqual(3, hsp.evalue) self.assertEqual('lPwelgLaevhqtLvengLRdrVsLia', str(hsp.hit.seq)) self.assertEqual('+P l++ +vh L++ gLR + s+ +', str(hsp.aln_annotation['similarity'])) self.assertEqual('IPPLLAVGAVHHHLINKGLRQEASILV', str(hsp.query.seq)) hsp = hit[1] self.assertEqual(2, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(412, hsp.hit_end) self.assertEqual('[]', hsp.hit_endtype) self.assertEqual(829, hsp.query_start) self.assertEqual(1216, hsp.query_end) self.assertEqual('..', hsp.query_endtype) self.assertAlmostEqual(857.3, hsp.bitscore) self.assertAlmostEqual(9e-255, hsp.evalue)
def test_tab_2228_tblastn_001(self): "Test parsing TBLASTN 2.2.28+ tabular output (tab_2228_tblastn_001)" tab_file = get_file('tab_2228_tblastn_001.txt') qresults = list(parse(tab_file, FMT, fields=['evalue', 'sallseqid', 'qseqid'], comments=True)) self.assertEqual(1, len(qresults)) self.assertEqual(10, len(qresults[0].hits)) # there is one hit with an alternative ID self.assertEqual(qresults[0]['gi|148227873|ref|NM_001095167.1|'], qresults[0]['gi|55250552|gb|BC086280.1|']) # check some of the HSPs self.assertEqual(0.0, qresults[0][0][0].evalue) self.assertEqual(8e-173, qresults[0][-1][0].evalue)
def parse_hmmout(file_name): ''' parse an hmmsearch output and return an array with group, family and score\n INPUT: filename of the hmmsearch output\n OUTPUT: np.array [id] ''' output = np.array([]) for qresult in parse(file_name, 'hmmer3-tab'): for item in qresult.hits: if output.size == 0: output = np.array([int(item.bitscore), item.id.split('|')[1]]) else: output = np.vstack( [output, [int(item.bitscore), item.id.split('|')[1]]]) return output
def test_tab_2228_tblastn_001(self): """Test parsing TBLASTN 2.2.28+ tabular output (tab_2228_tblastn_001).""" tab_file = get_file('tab_2228_tblastn_001.txt') qresults = list(parse(tab_file, FMT, fields=['evalue', 'sallseqid', 'qseqid'], comments=True)) self.assertEqual(1, len(qresults)) self.assertEqual(10, len(qresults[0].hits)) # there is one hit with an alternative ID self.assertEqual(qresults[0]['gi|148227873|ref|NM_001095167.1|'], qresults[0]['gi|55250552|gb|BC086280.1|']) # check some of the HSPs self.assertEqual(0.0, qresults[0][0][0].evalue) self.assertEqual(8e-173, qresults[0][-1][0].evalue)
def test_hmmpfam_23(self): """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_001.out)""" results = parse(path.join("Hmmer", "text_23_hmmpfam_001.out"), self.fmt) res = next(results) self.assertEqual('gi|90819130|dbj|BAE92499.1|', res.id) self.assertEqual('glutamate synthase [Porphyra yezoensis]', res.description) self.assertEqual('[none]', res.accession) self.assertEqual('hmmpfam', res.program) self.assertEqual('2.3.2', res.version) self.assertEqual('../Shared/Pfam_fs', res.target) self.assertEqual(54, len(res)) hit = res[0] self.assertEqual('Glu_synthase', hit.id) self.assertEqual('Conserved region in glutamate synthas', hit.description) self.assertAlmostEqual(858.6, hit.bitscore) self.assertAlmostEqual(3.6e-255, hit.evalue) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(296, hsp.hit_start) self.assertEqual(323, hsp.hit_end) self.assertEqual('..', hsp.hit_endtype) self.assertEqual(649, hsp.query_start) self.assertEqual(676, hsp.query_end) self.assertEqual('..', hsp.query_endtype) self.assertAlmostEqual(1.3, hsp.bitscore) self.assertAlmostEqual(3, hsp.evalue) self.assertEqual('lPwelgLaevhqtLvengLRdrVsLia', str(hsp.hit.seq)) self.assertEqual('+P l++ +vh L++ gLR + s+ +', str(hsp.aln_annotation['similarity'])) self.assertEqual('IPPLLAVGAVHHHLINKGLRQEASILV', str(hsp.query.seq)) hsp = hit[1] self.assertEqual(2, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(412, hsp.hit_end) self.assertEqual('[]', hsp.hit_endtype) self.assertEqual(829, hsp.query_start) self.assertEqual(1216, hsp.query_end) self.assertEqual('..', hsp.query_endtype) self.assertAlmostEqual(857.3, hsp.bitscore) self.assertAlmostEqual(9e-255, hsp.evalue)
def parse_labelled_hmmout(file_name): ''' parse an hmmsearch output and return an array with group, family and score\n INPUT: filename of the hmmsearch output\n OUTPUT: np.array [group, family, score] ''' output = np.array([]) for qresult in parse(file_name, 'hmmer3-tab'): for item in qresult.hits: group = item.description.split()[1] family = item.description.split()[2] score = item.bitscore if output.size == 0: output = np.array([int(score), group, family]) else: output = np.vstack([output, [int(score), group, family]]) return output
def test_tab_2226_tblastn_012(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_012)" xml_file = get_file('tab_2226_tblastn_012.txt') qresults = parse(xml_file, FMT, comments=True) counter = 0 # test first qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('refseq_rna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('random_s00', qresult.id) self.assertEqual('X76FDCG9016', qresult.rid) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('refseq_rna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual('X76FDCG9016', qresult.rid) self.assertEqual(3, len(qresult)) # test last qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('refseq_rna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual('X76FDCG9016', qresult.rid) self.assertEqual(5, len(qresult)) # check if we've finished iteration over qresults self.assertRaises( StopIteration, qresults.next, ) self.assertEqual(3, counter)
def test_hmmpfam_23_missing_consensus(self): """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_003.out)""" results = parse(path.join("Hmmer", "text_23_hmmpfam_003.out"), self.fmt) res = next(results) self.assertEqual('small_input', res.id) self.assertEqual('[none]', res.description) self.assertEqual('[none]', res.accession) self.assertEqual('hmmpfam', res.program) self.assertEqual('2.3.2', res.version) self.assertEqual( 'antismash/specific_modules/lantipeptides/ClassIVLanti.hmm', res.target) self.assertEqual(1, len(res)) hit = res[0] self.assertEqual('ClassIVLanti', hit.id) self.assertEqual('Class-IV', hit.description) self.assertAlmostEqual(-79.3, hit.bitscore) self.assertAlmostEqual(1, hit.evalue) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(66, hsp.hit_end) self.assertEqual('[]', hsp.hit_endtype) self.assertEqual(5, hsp.query_start) self.assertEqual(20, hsp.query_end) self.assertEqual('..', hsp.query_endtype) self.assertAlmostEqual(-79.3, hsp.bitscore) self.assertAlmostEqual(1, hsp.evalue) self.assertEqual(len(hsp.query.seq), len(hsp.hit.seq)) self.assertEqual(len(hsp.query.seq), len(hsp.aln_annotation['similarity'])) self.assertEqual( 'msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLnahiqakeLsdeeLEgvaGg', str(hsp.hit.seq)) self.assertEqual( ' F+ G +t Ln ', str(hsp.aln_annotation['similarity'])) self.assertEqual( '-------CFL---------------------------GCLVTNWVLNRS-----------------', str(hsp.query.seq))
def test_tab_2228_tblastx_001(self): """Test parsing TBLASTX 2.2.28+ tabular output (tab_2228_tblastx_001).""" tab_file = get_file("tab_2228_tblastx_001.txt") qresults = list( parse(tab_file, FMT, fields=list(all_fields.values()), comments=True)) # this a single query, with 192 hits and 243 hsps self.assertEqual(1, len(qresults)) self.assertEqual(192, len(qresults[0].hits)) self.assertEqual(243, sum(len(x) for x in qresults[0])) # there is one hit with an alternative ID self.assertEqual( qresults[0]["gi|31126987|gb|AY255526.2|"], qresults[0]["gi|31342050|ref|NM_181083.2|"], ) # only checking the new fields in 2.2.28+ hit = qresults[0][0] self.assertEqual(["NM_001183135", "EF059095"], hit.accession_all) self.assertEqual(["32630", "559292"], hit.tax_ids) self.assertEqual(["N/A", "N/A"], hit.sci_names) self.assertEqual(["N/A", "N/A"], hit.com_names) self.assertEqual(["N/A"], hit.blast_names) self.assertEqual(["N/A"], hit.super_kingdoms) self.assertEqual("Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA", hit.title) self.assertEqual( [ "Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA", "Synthetic construct Saccharomyces cerevisiae clone " "FLH203015.01X MON2, complete sequence", ], hit.title_all, ) self.assertEqual("N/A", hit.strand) self.assertEqual(100.0, hit.query_coverage) for hsp in hit[:4]: # shorthand ~ the values just happen to all be 99 # in other cases, they may be different self.assertEqual(99.0, hsp.query_coverage) self.assertEqual(73.0, hit[5].query_coverage) self.assertEqual(12.0, hit[6].query_coverage)
def test_xml_001(self): xml_file = get_file('test_001.xml') qresults = parse(xml_file, FMT) counter = 0 # test each qresult's attributes qresult = next(qresults) counter += 1 self.assertEqual('5.26-65.0', qresult.version) # test parsed values of qresult self.assertEqual('AT5G23090.4', qresult.id) self.assertEqual('pacid=19665592 transcript=AT5G23090.4 locus=AT5G23090 ID=AT5G23090.4.TAIR10 annot-version=TAIR10', qresult.description) self.assertEqual(4, len(qresult)) hit = qresult[0] self.assertEqual('PF00808', hit.id) self.assertEqual('Histone-like transcription factor (CBF/NF-Y) and archaeal histone', hit.description) self.assertEqual('PFAM', hit.attributes['Target']) self.assertEqual('31.0', hit.attributes['Target version']) self.assertEqual('hmmer3', hit.attributes['Hit type']) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertEqual(76.7, hsp.bitscore) self.assertEqual(1.1e-21, hsp.evalue) self.assertEqual(13, hsp.query_start) self.assertEqual(79, hsp.query_end) self.assertEqual(0, hsp.hit_start) self.assertEqual(65, hsp.hit_end) self.assertEqual(66, hsp.aln_span) self.assertEqual('MDPMDIVGKSKEDASLPKATMTKIIKEMLPPDVRVARDAQDLLIECCVEFINLVSSESNDVCNKEDKRTIAPEHVLKALQVLGFGEYIEEVYAAYEQHKYETMDTQRSVKWNPGAQMTEEEAAAEQQRMFAEARARMNGGVSVPQPEHPETDQRSPQS', str(hsp.query.seq)) # parse last hit hit = qresult[-1] self.assertEqual('SSF47113', hit.id) self.assertEqual(1, len(hit)) self.assertEqual('IPR:IPR009072', hit.dbxrefs[0]) self.assertEqual('GO:0046982', hit.dbxrefs[1]) hsp = hit.hsps[0] self.assertEqual(11, hsp.query_start) self.assertEqual(141, hsp.query_end)
def test_domtab_30_hmmscan_003(self): "Test parsing hmmscan-domtab, hmmscan 3.0, multiple queries (domtab_30_hmmscan_003)" tab_file = get_file('domtab_30_hmmscan_003.out') qresults = parse(tab_file, self.fmt) counter = 0 qresult = qresults.next() counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) self.assertEqual(154, qresult.seq_len) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('gi|4885477|ref|NP_005359.1|', hit.query_id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(108, hit.seq_len) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual('Globin', hsp.hit_id) self.assertEqual('gi|4885477|ref|NP_005359.1|', hsp.query_id) self.assertEqual(1, hsp.domain_index) self.assertEqual(6.7e-25, hsp.evalue_cond) self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) self.assertEqual(0, hsp.hit_start) self.assertEqual(107, hsp.hit_end) self.assertEqual(6, hsp.query_start) self.assertEqual(112, hsp.query_end) self.assertEqual(6, hsp.env_start) self.assertEqual(113, hsp.env_end) self.assertEqual(0.97, hsp.acc_avg) # test if we've properly finished iteration self.assertRaises( StopIteration, qresults.next, ) self.assertEqual(1, counter)
def test_tab_2226_tblastn_006(self): """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_006).""" xml_file = get_file("tab_2226_tblastn_006.txt") qresults = parse(xml_file, FMT, comments=True) counter = 0 qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("db/minirefseq_mrna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("random_s00", qresult.id) self.assertEqual(0, len(qresult)) # check if we've finished iteration over qresults self.assertRaises(StopIteration, next, qresults) self.assertEqual(1, counter)
def test_tab_2226_tblastn_006(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_006)" xml_file = get_file('tab_2226_tblastn_006.txt') qresults = parse(xml_file, FMT, comments=True) counter = 0 qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('random_s00', qresult.id) self.assertEqual(0, len(qresult)) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(1, counter)
def test_tab_2226_tblastn_012(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_012)" xml_file = get_file('tab_2226_tblastn_012.txt') qresults = parse(xml_file, FMT, comments=True) counter = 0 # test first qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('refseq_rna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('random_s00', qresult.id) self.assertEqual('X76FDCG9016', qresult.rid) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('refseq_rna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual('X76FDCG9016', qresult.rid) self.assertEqual(3, len(qresult)) # test last qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('refseq_rna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual('X76FDCG9016', qresult.rid) self.assertEqual(5, len(qresult)) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(3, counter)
def test_hmmpfam_22(self): """Test parsing hmmpfam 2.2 file (text_22_hmmpfam_001.out)""" results = parse(path.join("Hmmer", "text_22_hmmpfam_001.out"), self.fmt) res = next(results) self.assertEqual('gi|1522636|gb|AAC37060.1|', res.id) self.assertEqual( 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]', res.description) self.assertEqual('[none]', res.accession) self.assertEqual('hmmpfam', res.program) self.assertEqual('2.2g', res.version) self.assertEqual('Pfam', res.target) self.assertEqual(1, len(res)) hit = res[0] self.assertEqual('Methylase_M', hit.id) self.assertEqual('Type I restriction modification system, M', hit.description) self.assertAlmostEqual(-105.2, hit.bitscore) self.assertAlmostEqual(0.0022, hit.evalue) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(279, hsp.hit_end) self.assertEqual('[]', hsp.hit_endtype) self.assertEqual(279, hsp.query_start) self.assertEqual(481, hsp.query_end) self.assertEqual('..', hsp.query_endtype) self.assertAlmostEqual(-105.2, hsp.bitscore) self.assertAlmostEqual(0.0022, hsp.evalue) self.assertEqual( 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn', str(hsp.hit.seq)) self.assertEqual( ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+', str(hsp.aln_annotation['similarity'])) self.assertEqual( 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST', str(hsp.query.seq))
def test_domtab_30_hmmscan_003(self): "Test parsing hmmscan-domtab, hmmscan 3.0, multiple queries (domtab_30_hmmscan_003)" tab_file = get_file('domtab_30_hmmscan_003.out') qresults = parse(tab_file, self.fmt) counter = 0 qresult = next(qresults) counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) self.assertEqual(154, qresult.seq_len) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('gi|4885477|ref|NP_005359.1|', hit.query_id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(108, hit.seq_len) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual('Globin', hsp.hit_id) self.assertEqual('gi|4885477|ref|NP_005359.1|', hsp.query_id) self.assertEqual(1, hsp.domain_index) self.assertEqual(6.7e-25, hsp.evalue_cond) self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) self.assertEqual(0, hsp.hit_start) self.assertEqual(107, hsp.hit_end) self.assertEqual(6, hsp.query_start) self.assertEqual(112, hsp.query_end) self.assertEqual(6, hsp.env_start) self.assertEqual(113, hsp.env_end) self.assertEqual(0.97, hsp.acc_avg) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(1, counter)
def test_tab_2226_tblastn_012(self): """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_012).""" xml_file = get_file("tab_2226_tblastn_012.txt") qresults = parse(xml_file, FMT, comments=True) counter = 0 # test first qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("refseq_rna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("random_s00", qresult.id) self.assertEqual("X76FDCG9016", qresult.rid) self.assertEqual(0, len(qresult)) # test second qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("refseq_rna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id) self.assertEqual("X76FDCG9016", qresult.rid) self.assertEqual(3, len(qresult)) # test last qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("refseq_rna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("gi|11464971:4-101", qresult.id) self.assertEqual("X76FDCG9016", qresult.rid) self.assertEqual(5, len(qresult)) # check if we've finished iteration over qresults self.assertRaises(StopIteration, next, qresults) self.assertEqual(3, counter)
def test_hmmpfam_22(self): """Test parsing hmmpfam 2.2 file (text_22_hmmpfam_001.out)""" results = parse(path.join("Hmmer", "text_22_hmmpfam_001.out"), self.fmt) res = next(results) self.assertEqual("gi|1522636|gb|AAC37060.1|", res.id) self.assertEqual("M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]", res.description) self.assertEqual("[none]", res.accession) self.assertEqual("hmmpfam", res.program) self.assertEqual("2.2g", res.version) self.assertEqual("Pfam", res.target) self.assertEqual(1, len(res)) hit = res[0] self.assertEqual("Methylase_M", hit.id) self.assertEqual("Type I restriction modification system, M", hit.description) self.assertAlmostEqual(-105.2, hit.bitscore) self.assertAlmostEqual(0.0022, hit.evalue) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(279, hsp.hit_end) self.assertEqual("[]", hsp.hit_endtype) self.assertEqual(279, hsp.query_start) self.assertEqual(481, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertAlmostEqual(-105.2, hsp.bitscore) self.assertAlmostEqual(0.0022, hsp.evalue) self.assertEqual( "lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn", str(hsp.hit.seq), ) self.assertEqual( " ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+", str(hsp.aln_annotation["similarity"]), ) self.assertEqual( "NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST", str(hsp.query.seq), )
def test_domtab_30_hmmsearch_001(self): "Test parsing hmmsearch-domtab, hmmsearch 3.0, multiple queries (domtab_30_hmmsearch_001)" tab_file = get_file('domtab_30_hmmsearch_001.out') qresults = parse(tab_file, self.fmt) # first qresult # we only want to check the coordinate switch actually # so checking the first hsp of the first hit of the qresult is enough qresult = next(qresults) self.assertEqual(7, len(qresult)) self.assertEqual('Pkinase', qresult.id) self.assertEqual('PF00069.17', qresult.accession) self.assertEqual(260, qresult.seq_len) hit = qresult[0] self.assertEqual(2, len(hit)) self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id) self.assertEqual('Pkinase', hit.query_id) self.assertEqual('-', hit.accession) self.assertEqual(733, hit.seq_len) self.assertEqual(8.4e-147, hit.evalue) self.assertEqual(492.3, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual( 'Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=2 SV=1', hit.description) hsp = hit.hsps[0] self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id) self.assertEqual('Pkinase', hsp.query_id) self.assertEqual(1, hsp.domain_index) self.assertEqual(4.6e-75, hsp.evalue_cond) self.assertEqual(3.5e-70, hsp.evalue) self.assertEqual(241.2, hsp.bitscore) self.assertEqual(0.0, hsp.bias) self.assertEqual(58, hsp.hit_start) self.assertEqual(318, hsp.hit_end) self.assertEqual(0, hsp.query_start) self.assertEqual(260, hsp.query_end) self.assertEqual(58, hsp.env_start) self.assertEqual(318, hsp.env_end) self.assertEqual(0.95, hsp.acc_avg)
def test_30_hmmscan_003(self): "Test parsing hmmer3-tab, hmmscan 3.0, single query, single hit, single hsp (tab_30_hmmscan_003)" tab_file = get_file('tab_30_hmmscan_003.out') qresults = parse(tab_file, FMT) counter = 0 qresult = qresults.next() counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.acc) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.acc) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) # test if we've properly finished iteration self.assertRaises( StopIteration, qresults.next, ) self.assertEqual(1, counter)
def test_domtab_31b1_hmmsearch_001(self): "Test parsing hmmsearch-domtab, hmmsearch 3.1b1, single query (domtab_31b1_hmmsearch_001)" tab_file = get_file('domtab_31b1_hmmsearch_001.out') qresults = list(parse(tab_file, self.fmt)) self.assertEqual(1, len(qresults)) qresult = qresults[0] self.assertEqual('Pkinase', qresult.id) self.assertEqual('PF00069.17', qresult.accession) self.assertEqual(260, qresult.seq_len) hit = qresult[0] self.assertEqual(2, len(hit)) self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id) self.assertEqual('Pkinase', hit.query_id) self.assertEqual('-', hit.accession) self.assertEqual(733, hit.seq_len) self.assertEqual(8.5e-147, hit.evalue) self.assertEqual(492.3, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual( 'Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=1 SV=1', hit.description) hsp = hit.hsps[0] self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id) self.assertEqual('Pkinase', hsp.query_id) self.assertEqual(1, hsp.domain_index) self.assertEqual(2.6e-75, hsp.evalue_cond) self.assertEqual(3.6e-70, hsp.evalue) self.assertEqual(241.2, hsp.bitscore) self.assertEqual(0.0, hsp.bias) self.assertEqual(58, hsp.hit_start) self.assertEqual(318, hsp.hit_end) self.assertEqual(0, hsp.query_start) self.assertEqual(260, hsp.query_end) self.assertEqual(58, hsp.env_start) self.assertEqual(318, hsp.env_end) self.assertEqual(0.95, hsp.acc_avg)
def test_hmmpfam_23_missing_consensus(self): """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_003.out)""" results = parse(path.join("Hmmer", "text_23_hmmpfam_003.out"), self.fmt) res = next(results) self.assertEqual('small_input', res.id) self.assertEqual('[none]', res.description) self.assertEqual('[none]', res.accession) self.assertEqual('hmmpfam', res.program) self.assertEqual('2.3.2', res.version) self.assertEqual('antismash/specific_modules/lantipeptides/ClassIVLanti.hmm', res.target) self.assertEqual(1, len(res)) hit = res[0] self.assertEqual('ClassIVLanti', hit.id) self.assertEqual('Class-IV', hit.description) self.assertAlmostEqual(-79.3, hit.bitscore) self.assertAlmostEqual(1, hit.evalue) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(66, hsp.hit_end) self.assertEqual('[]', hsp.hit_endtype) self.assertEqual(5, hsp.query_start) self.assertEqual(20, hsp.query_end) self.assertEqual('..', hsp.query_endtype) self.assertAlmostEqual(-79.3, hsp.bitscore) self.assertAlmostEqual(1, hsp.evalue) self.assertEqual(len(hsp.query.seq), len(hsp.hit.seq)) self.assertEqual(len(hsp.query.seq), len(hsp.aln_annotation['similarity'])) self.assertEqual('msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLnahiqakeLsdeeLEgvaGg', str(hsp.hit.seq)) self.assertEqual(' F+ G +t Ln ', str(hsp.aln_annotation['similarity'])) self.assertEqual('-------CFL---------------------------GCLVTNWVLNRS-----------------', str(hsp.query.seq))
def test_domtab_30_hmmsearch_001(self): "Test parsing hmmsearch-domtab, hmmsearch 3.0, multiple queries (domtab_30_hmmsearch_001)" tab_file = get_file('domtab_30_hmmsearch_001.out') qresults = parse(tab_file, self.fmt) # first qresult # we only want to check the coordinate switch actually # so checking the first hsp of the first hit of the qresult is enough qresult = next(qresults) self.assertEqual(7, len(qresult)) self.assertEqual('Pkinase', qresult.id) self.assertEqual('PF00069.17', qresult.accession) self.assertEqual(260, qresult.seq_len) hit = qresult[0] self.assertEqual(2, len(hit)) self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id) self.assertEqual('Pkinase', hit.query_id) self.assertEqual('-', hit.accession) self.assertEqual(733, hit.seq_len) self.assertEqual(8.4e-147, hit.evalue) self.assertEqual(492.3, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual('Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=2 SV=1', hit.description) hsp = hit.hsps[0] self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id) self.assertEqual('Pkinase', hsp.query_id) self.assertEqual(1, hsp.domain_index) self.assertEqual(4.6e-75, hsp.evalue_cond) self.assertEqual(3.5e-70, hsp.evalue) self.assertEqual(241.2, hsp.bitscore) self.assertEqual(0.0, hsp.bias) self.assertEqual(58, hsp.hit_start) self.assertEqual(318, hsp.hit_end) self.assertEqual(0, hsp.query_start) self.assertEqual(260, hsp.query_end) self.assertEqual(58, hsp.env_start) self.assertEqual(318, hsp.env_end) self.assertEqual(0.95, hsp.acc_avg)
def test_tab_2228_tblastx_001(self): "Test parsing TBLASTX 2.2.28+ tabular output (tab_2228_tblastx_001)" tab_file = get_file('tab_2228_tblastx_001.txt') qresults = list( parse(tab_file, FMT, fields=list(all_fields.values()), comments=True)) # this a single query, with 192 hits and 243 hsps self.assertEqual(1, len(qresults)) self.assertEqual(192, len(qresults[0].hits)) self.assertEqual(243, sum([len(x) for x in qresults[0]])) # only checking the new fields in 2.2.28+ hit = qresults[0][0] self.assertEqual(['NM_001183135', 'EF059095'], hit.accession_all) self.assertEqual(['32630', '559292'], hit.tax_ids) self.assertEqual(['N/A', 'N/A'], hit.sci_names) self.assertEqual(['N/A', 'N/A'], hit.com_names) self.assertEqual(['N/A'], hit.blast_names) self.assertEqual(['N/A'], hit.super_kingdoms) self.assertEqual('Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA', hit.title) self.assertEqual([ 'Saccharomyces cerevisiae S288c Mon2p (MON2), mRNA', 'Synthetic construct Saccharomyces cerevisiae clone ' 'FLH203015.01X MON2, complete sequence' ], hit.title_all) self.assertEqual('N/A', hit.strand) self.assertEqual(100.0, hit.query_coverage) for hsp in hit[:4]: # shorthand ~ the values just happen to all be 99 # in other cases, they may be different self.assertEqual(99.0, hsp.query_coverage) self.assertEqual(73.0, hit[5].query_coverage) self.assertEqual(12.0, hit[6].query_coverage)
def test_domtab_31b1_hmmsearch_001(self): "Test parsing hmmsearch-domtab, hmmsearch 3.1b1, single query (domtab_31b1_hmmsearch_001)" tab_file = get_file('domtab_31b1_hmmsearch_001.out') qresults = list(parse(tab_file, self.fmt)) self.assertEqual(1, len(qresults)) qresult = qresults[0] self.assertEqual('Pkinase', qresult.id) self.assertEqual('PF00069.17', qresult.accession) self.assertEqual(260, qresult.seq_len) hit = qresult[0] self.assertEqual(2, len(hit)) self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id) self.assertEqual('Pkinase', hit.query_id) self.assertEqual('-', hit.accession) self.assertEqual(733, hit.seq_len) self.assertEqual(8.5e-147, hit.evalue) self.assertEqual(492.3, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual('Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=1 SV=1', hit.description) hsp = hit.hsps[0] self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hsp.hit_id) self.assertEqual('Pkinase', hsp.query_id) self.assertEqual(1, hsp.domain_index) self.assertEqual(2.6e-75, hsp.evalue_cond) self.assertEqual(3.6e-70, hsp.evalue) self.assertEqual(241.2, hsp.bitscore) self.assertEqual(0.0, hsp.bias) self.assertEqual(58, hsp.hit_start) self.assertEqual(318, hsp.hit_end) self.assertEqual(0, hsp.query_start) self.assertEqual(260, hsp.query_end) self.assertEqual(58, hsp.env_start) self.assertEqual(318, hsp.env_end) self.assertEqual(0.95, hsp.acc_avg)
def test_tab_2226_tblastn_011(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_011)" xml_file = get_file('tab_2226_tblastn_011.txt') qresults = parse(xml_file, FMT, comments=True) counter = 0 # test first qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('random_s00', qresult.id) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.accession) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.accession_version) self.assertEqual('0', qresult.gi) self.assertEqual(102, qresult.seq_len) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id) self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id_all) self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.accession) self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.accession_version) self.assertEqual('0', hit.gi) self.assertEqual('0', hit.gi_all) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(4632, hit.seq_len) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(34.88, hsp.ident_pct) self.assertEqual(43, hsp.aln_span) self.assertEqual(28, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(30, hsp.query_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1743, hsp.hit_start) self.assertEqual(1872, hsp.hit_end) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) self.assertEqual('PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD', str(hsp.query.seq)) self.assertEqual('PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID', str(hsp.hit.seq)) self.assertEqual(78, hsp.bitscore_raw) self.assertEqual(15, hsp.ident_num) self.assertEqual(26, hsp.pos_num) self.assertEqual(0, hsp.gap_num) self.assertEqual(60.47, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) hit = qresult[-1] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id) self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.accession) self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.accession_version) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(33.90, hsp.ident_pct) self.assertEqual(59, hsp.aln_span) self.assertEqual(31, hsp.mismatch_num) self.assertEqual(1, hsp.gapopen_num) self.assertEqual(43, hsp.query_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1056, hsp.hit_start) self.assertEqual(1233, hsp.hit_end) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) self.assertEqual('GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL', str(hsp.query.seq)) self.assertEqual('GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL', str(hsp.hit.seq)) self.assertEqual(70.0, hsp.bitscore_raw) self.assertEqual(20, hsp.ident_num) self.assertEqual(29, hsp.pos_num) self.assertEqual(8, hsp.gap_num) self.assertEqual(49.15, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) # test last qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual('gi|11464971:4-101', qresult.accession) self.assertEqual('gi|11464971:4-101', qresult.accession_version) self.assertEqual('0', qresult.gi) self.assertEqual(98, qresult.seq_len) self.assertEqual(5, len(qresult)) hit = qresult[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id) self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id_all) self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.accession) self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.accession_version) self.assertEqual('0', hit.gi) self.assertEqual('0', hit.gi_all) self.assertEqual('gi|11464971:4-101', hit.query_id) self.assertEqual(772, hit.seq_len) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) self.assertEqual(95.92, hsp.ident_pct) self.assertEqual(98, hsp.aln_span) self.assertEqual(4, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(0, hsp.query_start) self.assertEqual(98, hsp.query_end) self.assertEqual(94, hsp.hit_start) self.assertEqual(388, hsp.hit_end) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(199, hsp.bitscore) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)) self.assertEqual('KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK', str(hsp.hit.seq)) self.assertEqual(506.0, hsp.bitscore_raw) self.assertEqual(94, hsp.ident_num) self.assertEqual(96, hsp.pos_num) self.assertEqual(0, hsp.gap_num) self.assertEqual(97.96, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(2, hsp.hit_frame) hsp = hit.hsps[-1] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) self.assertEqual(29.58, hsp.ident_pct) self.assertEqual(71, hsp.aln_span) self.assertEqual(46, hsp.mismatch_num) self.assertEqual(2, hsp.gapopen_num) self.assertEqual(29, hsp.query_start) self.assertEqual(96, hsp.query_end) self.assertEqual(541, hsp.hit_start) self.assertEqual(754, hsp.hit_end) self.assertEqual(4e-05, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) self.assertEqual('IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)) self.assertEqual('LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA', str(hsp.hit.seq)) self.assertEqual(73.0, hsp.bitscore_raw) self.assertEqual(21, hsp.ident_num) self.assertEqual(33, hsp.pos_num) self.assertEqual(4, hsp.gap_num) self.assertEqual(46.48, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(2, hsp.hit_frame) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(3, counter)
def test_tab_2226_tblastn_010(self): "Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_010)" xml_file = get_file('tab_2226_tblastn_010.txt') qresults = parse(xml_file, FMT, comments=True) counter = 0 # test first qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('random_s00', qresult.id) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) hit = qresult[-1] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) # test last qresult qresult = qresults.next() counter += 1 self.assertEqual('tblastn', qresult.program) self.assertEqual('db/minirefseq_mrna', qresult.target) self.assertEqual('2.2.26+', qresult.version) self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual(5, len(qresult)) hit = qresult[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id) self.assertEqual('gi|11464971:4-101', hit.query_id) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(199, hsp.bitscore) hsp = hit.hsps[-1] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) self.assertEqual(4e-05, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(3, counter)
def test_tab_2226_tblastn_009(self): "Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_009)" xml_file = get_file('tab_2226_tblastn_009.txt') qresults = parse(xml_file, FMT, fields=('qseqid', 'sseqid')) counter = 0 # test first qresult qresult = qresults.next() counter += 1 self.assertEqual('<unknown program>', qresult.program) self.assertEqual('<unknown target>', qresult.target) self.assertEqual('<unknown version>', qresult.version) self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) hit = qresult[-1] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hit.id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hit.query_id) self.assertEqual(1, len(hit)) hsp = hit[0] self.assertEqual('gi|115975252|ref|XM_001180111.1|', hsp.hit_id) self.assertEqual('gi|16080617|ref|NP_391444.1|', hsp.query_id) # test last qresult qresult = qresults.next() counter += 1 self.assertEqual('<unknown program>', qresult.program) self.assertEqual('<unknown target>', qresult.target) self.assertEqual('<unknown version>', qresult.version) self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual(5, len(qresult)) hit = qresult[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id) self.assertEqual('gi|11464971:4-101', hit.query_id) self.assertEqual(2, len(hit)) hsp = hit[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) hsp = hit[-1] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hsp.hit_id) self.assertEqual('gi|11464971:4-101', hsp.query_id) # check if we've finished iteration over qresults self.assertRaises(StopIteration, qresults.next, ) self.assertEqual(2, counter)
def test_4p79(self): "Test parsing 4p79_hhsearch_server_NOssm.hhr" txt_file = get_file('4p79_hhsearch_server_NOssm.hhr') qresults = parse(txt_file, FMT) # test first and only qresult qresult = next(qresults) num_hits = 8 self.assertEqual('HHSUITE', qresult.program) self.assertEqual('4P79:A|PDBID|CHAIN|SEQUENCE', qresult.id) self.assertEqual(198, qresult.seq_len) self.assertEqual(num_hits, len(qresult)) hit = qresult[0] self.assertEqual('4P79_A', hit.id) self.assertEqual('cell adhesion protein; cell adhesion, tight junction, membrane; HET: OLC' ', MSE; 2.4A {Mus musculus}', hit.description) self.assertTrue(hit.is_included) self.assertEqual(6.8e-32, hit.evalue) self.assertEqual(194.63, hit.score) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertTrue(hsp.is_included) self.assertEqual(0, hsp.output_index) self.assertEqual(6.8e-32, hsp.evalue) self.assertEqual(194.63, hsp.score) self.assertEqual(99.94, hsp.prob) self.assertEqual(0, hsp.hit_start) self.assertEqual(198, hsp.hit_end) self.assertEqual(0, hsp.query_start) self.assertEqual(198, hsp.query_end) self.assertEqual('GSEFMSVAVETFGFFMSALGLLMLGLTLSNSYWRVSTVHGNVITTNTIFENLWYSCATDSLGVSNCWDFPSMLALSGYVQ' 'GCRALMITAILLGFLGLFLGMVGLRATNVGNMDLSKKAKLLAIAGTLHILAGACGMVAISWYAVNITTDFFNPLYAGTKY' 'ELGPALYLGWSASLLSILGGICVFSTAAASSKEEPATR', str(hsp.query.seq)) self.assertEqual('GSEFMSVAVETFGFFMSALGLLMLGLTLSNSYWRVSTVHGNVITTNTIFENLWYSCATDSLGVSNCWDFPSMLALSGYVQ' 'GCRALMITAILLGFLGLFLGMVGLRATNVGNMDLSKKAKLLAIAGTLHILAGACGMVAISWYAVNITTDFFNPLYAGTKY' 'ELGPALYLGWSASLLSILGGICVFSTAAASSKEEPATR', str(hsp.hit.seq)) # Check last hit hit = qresult[num_hits - 1] self.assertEqual('5YQ7_F', hit.id) self.assertEqual('Beta subunit of light-harvesting 1; Photosynthetic core complex, PHOTOSYNTHESIS; ' 'HET: MQE, BCL, HEM, KGD, BPH;{Roseiflexus castenholzii}; Related PDB entries: 5YQ7_V' ' 5YQ7_3 5YQ7_T 5YQ7_J 5YQ7_9 5YQ7_N 5YQ7_A 5YQ7_P 5YQ7_H 5YQ7_D 5YQ7_5 5YQ7_7 5YQ7_1 ' '5YQ7_R', hit.description) self.assertTrue(hit.is_included) self.assertEqual(6.7, hit.evalue) self.assertEqual(20.51, hit.score) self.assertEqual(1, len(hit)) # Check we can get the original last HSP from the file. num_hsps = num_hits self.assertEqual(num_hsps, len(qresult.hsps)) hsp = qresult.hsps[-1] self.assertTrue(hsp.is_included) self.assertEqual(num_hsps - 1, hsp.output_index) self.assertEqual(6.7, hsp.evalue) self.assertEqual(20.51, hsp.score) self.assertEqual(52.07, hsp.prob) self.assertEqual(8, hsp.hit_start) self.assertEqual(42, hsp.hit_end) self.assertEqual(5, hsp.query_start) self.assertEqual(37, hsp.query_end) self.assertEqual('RTSVVVSTLLGLVMALLIHFVVLSSGAFNWLRAP', str(hsp.hit.seq)) self.assertEqual('SVAVETFGFFMSALGLLMLGLTLSNS--YWRVST', str(hsp.query.seq))
def test_4y9h_nossm(self): "Test parsing 4y9h_hhsearch_server_NOssm.hhr" txt_file = get_file('4y9h_hhsearch_server_NOssm.hhr') qresults = parse(txt_file, FMT) # test first and only qresult qresult = next(qresults) num_hits = 29 self.assertEqual('HHSUITE', qresult.program) self.assertEqual('4Y9H:A|PDBID|CHAIN|SEQUENCE', qresult.id) self.assertEqual(226, qresult.seq_len) self.assertEqual(num_hits, len(qresult)) hit = qresult[0] self.assertEqual('5ZIM_A', hit.id) self.assertEqual('Bacteriorhodopsin; proton pump, membrane protein, PROTON; HET: L2P, RET; 1.25A {Halobacterium' ' salinarum}; Related PDB entries: 1R84_A 1KG8_A 1KME_B 1KGB_A 1KG9_A 1KME_A 4X31_A 5ZIL_A 1E0P_A ' '4X32_A 5ZIN_A 1S53_B 1S51_B 1S53_A 1S54_A 1F50_A 1S54_B 1S51_A 1F4Z_A 5J7A_A 1S52_B 1S52_A 4Y9H_A ' '3T45_A 3T45_C 3T45_B 1C3W_A 1L0M_A', hit.description) self.assertTrue(hit.is_included) self.assertEqual(2.1e-48, hit.evalue) self.assertEqual(320.44, hit.score) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertTrue(hsp.is_included) self.assertEqual(0, hsp.output_index) self.assertEqual(2.1e-48, hsp.evalue) self.assertEqual(320.44, hsp.score) self.assertEqual(100.00, hsp.prob) self.assertEqual(1, hsp.hit_start) self.assertEqual(227, hsp.hit_end) self.assertEqual(0, hsp.query_start) self.assertEqual(226, hsp.query_end) self.assertEqual('GRPEWIWLALGTALMGLGTLYFLVKGMGVSDPDAKKFYAITTLVPAIAFTMYLSMLLGYGLTMVPFGGEQNPIYWARYAD' 'WLFTTPLLLLDLALLVDADQGTILALVGADGIMIGTGLVGALTKVYSYRFVWWAISTAAMLYILYVLFFGFTSKAESMRP' 'EVASTFKVLRNVTVVLWSAYPVVWLIGSEGAGIVPLNIETLLFMVLDVSAKVGFGLILLRSRAIFG', str(hsp.hit.seq)) self.assertEqual('GRPEWIWLALGTALMGLGTLYFLVKGMGVSDPDAKKFYAITTLVPAIAFTMYLSMLLGYGLTMVPFGGEQNPIYWARYAD' 'WLFTTPLLLLDLALLVDADQGTILALVGADGIMIGTGLVGALTKVYSYRFVWWAISTAAMLYILYVLFFGFTSKAESMRP' 'EVASTFKVLRNVTVVLWSAYPVVWLIGSEGAGIVPLNIETLLFMVLDVSAKVGFGLILLRSRAIFG', str(hsp.query.seq)) # Check last hit hit = qresult[num_hits - 1] self.assertEqual('5ABB_Z', hit.id) self.assertEqual('PROTEIN TRANSLOCASE SUBUNIT SECY, PROTEIN; TRANSLATION, RIBOSOME, MEMBRANE PROTEIN, ' 'TRANSLOCON; 8.0A {ESCHERICHIA COLI}', hit.description) self.assertTrue(hit.is_included) self.assertEqual(3.3e-05, hit.evalue) self.assertEqual(51.24, hit.score) self.assertEqual(1, len(hit)) # Check we can get the original last HSP from the file. num_hsps = num_hits self.assertEqual(num_hsps, len(qresult.hsps)) hsp = qresult.hsps[-1] self.assertTrue(hsp.is_included) self.assertEqual(num_hsps - 1, hsp.output_index) self.assertEqual(3.3e-05, hsp.evalue) self.assertEqual(51.24, hsp.score) self.assertEqual(96.55, hsp.prob) self.assertEqual(14, hsp.hit_start) self.assertEqual(65, hsp.hit_end) self.assertEqual(7, hsp.query_start) self.assertEqual(59, hsp.query_end) self.assertEqual('FWLVTAALLASTVFFFVERDRVS-AKWKTSLTVSGLVTGIAFWHYMYMRGVW', str(hsp.hit.seq)) self.assertEqual('LALGTALMGLGTLYFLVKGMGVSDPDAKKFYAITTLVPAIAFTMYLSMLLGY', str(hsp.query.seq))
def test_tab_2226_tblastn_011(self): """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_011).""" xml_file = get_file("tab_2226_tblastn_011.txt") qresults = parse(xml_file, FMT, comments=True) counter = 0 # test first qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("db/minirefseq_mrna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("random_s00", qresult.id) self.assertEqual(0, len(qresult)) # test second qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("db/minirefseq_mrna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id) self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.accession) self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.accession_version) self.assertEqual("0", qresult.gi) self.assertEqual(102, qresult.seq_len) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id) self.assertEqual(["gi|145479850|ref|XM_001425911.1|"], hit.id_all) self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.accession) self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.accession_version) self.assertEqual("0", hit.gi) self.assertEqual("0", hit.gi_all) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(4632, hit.seq_len) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(34.88, hsp.ident_pct) self.assertEqual(43, hsp.aln_span) self.assertEqual(28, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(30, hsp.query_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1743, hsp.hit_start) self.assertEqual(1872, hsp.hit_end) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD", hsp.query.seq) self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID", hsp.hit.seq) self.assertEqual(78, hsp.bitscore_raw) self.assertEqual(15, hsp.ident_num) self.assertEqual(26, hsp.pos_num) self.assertEqual(0, hsp.gap_num) self.assertEqual(60.47, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) hit = qresult[-1] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id) self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.accession) self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.accession_version) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(33.90, hsp.ident_pct) self.assertEqual(59, hsp.aln_span) self.assertEqual(31, hsp.mismatch_num) self.assertEqual(1, hsp.gapopen_num) self.assertEqual(43, hsp.query_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1056, hsp.hit_start) self.assertEqual(1233, hsp.hit_end) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) self.assertEqual( "GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL", hsp.query.seq) self.assertEqual( "GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL", hsp.hit.seq) self.assertEqual(70.0, hsp.bitscore_raw) self.assertEqual(20, hsp.ident_num) self.assertEqual(29, hsp.pos_num) self.assertEqual(8, hsp.gap_num) self.assertEqual(49.15, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) # test last qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("db/minirefseq_mrna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("gi|11464971:4-101", qresult.id) self.assertEqual("gi|11464971:4-101", qresult.accession) self.assertEqual("gi|11464971:4-101", qresult.accession_version) self.assertEqual("0", qresult.gi) self.assertEqual(98, qresult.seq_len) self.assertEqual(5, len(qresult)) hit = qresult[0] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id) self.assertEqual(["gi|350596019|ref|XM_003360601.2|"], hit.id_all) self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.accession) self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.accession_version) self.assertEqual("0", hit.gi) self.assertEqual("0", hit.gi_all) self.assertEqual("gi|11464971:4-101", hit.query_id) self.assertEqual(772, hit.seq_len) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id) self.assertEqual("gi|11464971:4-101", hsp.query_id) self.assertEqual(95.92, hsp.ident_pct) self.assertEqual(98, hsp.aln_span) self.assertEqual(4, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(0, hsp.query_start) self.assertEqual(98, hsp.query_end) self.assertEqual(94, hsp.hit_start) self.assertEqual(388, hsp.hit_end) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(199, hsp.bitscore) self.assertEqual( "KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK", hsp.query.seq, ) self.assertEqual( "KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK", hsp.hit.seq, ) self.assertEqual(506.0, hsp.bitscore_raw) self.assertEqual(94, hsp.ident_num) self.assertEqual(96, hsp.pos_num) self.assertEqual(0, hsp.gap_num) self.assertEqual(97.96, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(2, hsp.hit_frame) hsp = hit.hsps[-1] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id) self.assertEqual("gi|11464971:4-101", hsp.query_id) self.assertEqual(29.58, hsp.ident_pct) self.assertEqual(71, hsp.aln_span) self.assertEqual(46, hsp.mismatch_num) self.assertEqual(2, hsp.gapopen_num) self.assertEqual(29, hsp.query_start) self.assertEqual(96, hsp.query_end) self.assertEqual(541, hsp.hit_start) self.assertEqual(754, hsp.hit_end) self.assertEqual(4e-05, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) self.assertEqual( "IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA", hsp.query.seq, ) self.assertEqual( "LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA", hsp.hit.seq, ) self.assertEqual(73.0, hsp.bitscore_raw) self.assertEqual(21, hsp.ident_num) self.assertEqual(33, hsp.pos_num) self.assertEqual(4, hsp.gap_num) self.assertEqual(46.48, hsp.pos_pct) self.assertEqual(0, hsp.query_frame) self.assertEqual(2, hsp.hit_frame) # check if we've finished iteration over qresults self.assertRaises(StopIteration, next, qresults) self.assertEqual(3, counter)
def test_q9bsu1(self): "Test parsing hhsearch_q9bsu1_uniclust_w_ss_pfamA_30.hhr" txt_file = get_file('hhsearch_q9bsu1_uniclust_w_ss_pfamA_30.hhr') qresults = parse(txt_file, FMT) # test first and only qresult qresult = next(qresults) num_hits = 12 self.assertEqual('HHSUITE', qresult.program) self.assertEqual('sp|Q9BSU1|CP070_HUMAN UPF0183 protein C16orf70 OS=H**o sapiens OX=9606 GN=C16orf70' ' PE=1 SV=1', qresult.id) self.assertEqual(422, qresult.seq_len) self.assertEqual(num_hits, len(qresult)) hit = qresult[0] self.assertEqual('PF03676.13', hit.id) self.assertEqual('UPF0183 ; Uncharacterised protein family (UPF0183)', hit.description) self.assertTrue(hit.is_included) self.assertEqual(2e-106, hit.evalue) self.assertEqual(822.75, hit.score) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertTrue(hsp.is_included) self.assertEqual(0, hsp.output_index) self.assertEqual(2e-106, hsp.evalue) self.assertEqual(822.75, hsp.score) self.assertEqual(100.00, hsp.prob) self.assertEqual(0, hsp.hit_start) self.assertEqual(395, hsp.hit_end) self.assertEqual(10, hsp.query_start) self.assertEqual(407, hsp.query_end) self.assertEqual('SLGNEQWEFTLGMPLAQAVAILQKHCRIIKNVQVLYSEQSPLSHDLILNLTQDGIKLMFDAFNQRLKVIEVCDLTKVKLK' 'YCGVHFNSQAIAPTIEQIDQSFGATHPGVYNSAEQLFHLNFRGLSFSFQLDSWTEAPKYEPNFAHGLASLQIPHGATVKR' 'MYIYSGNSLQDTKAPMMPLSCFLGNVYAESVDVLRDGTGPAGLRLRLLAAGCGPGLLADAKMRVFERSVYFGDSCQDVLS' 'MLGSPHKVFYKSEDKMKIHSPSPHKQVPSKCNDYFFNYFTLGVDILFDANTHKVKKFVLHTNYPGHYNFNIYHRCEFKIP' 'LAIKKENADGQTE--TCTTYSKWDNIQELLGHPVEKPVVLHRSSSPNNTNPFGSTFCFGLQRMIFEVMQNNHIASVTLY', str(hsp.query.seq)) self.assertEqual('EQWE----FALGMPLAQAISILQKHCRIIKNVQVLYSEQMPLSHDLILNLTQDGIKLLFDACNQRLKVIEVYDLTKVKLK' 'YCGVHFNSQAIAPTIEQIDQSFGATHPGVYNAAEQLFHLNFRGLSFSFQLDSWSEAPKYEPNFAHGLASLQIPHGATVKR' 'MYIYSGNNLQETKAPAMPLACFLGNVYAECVEVLRDGAGPLGLKLRLLTAGCGPGVLADTKVRAVERSIYFGDSCQDVLS' 'ALGSPHKVFYKSEDKMKIHSPSPHKQVPSKCNDYFFNYYILGVDILFDSTTHLVKKFVLHTNFPGHYNFNIYHRCDFKIP' 'LIIKKDGADAHSEDCILTTYSKWDQIQELLGHPMEKPVVLHRSSSANNTNPFGSTFCFGLQRMIFEVMQNNHIASVTLY', str(hsp.hit.seq)) # Check last hit hit = qresult[num_hits - 1] self.assertEqual('PF10049.8', hit.id) self.assertEqual('DUF2283 ; Protein of unknown function (DUF2283)', hit.description) self.assertTrue(hit.is_included) self.assertEqual(78, hit.evalue) self.assertEqual(19.81, hit.score) self.assertEqual(1, len(hit)) # Check we can get the original last HSP from the file. num_hsps = 16 self.assertEqual(num_hsps, len(qresult.hsps)) hsp = qresult.hsps[-1] self.assertTrue(hsp.is_included) self.assertEqual(num_hsps - 1, hsp.output_index) self.assertEqual(78, hsp.evalue) self.assertEqual(19.81, hsp.score) self.assertEqual(20.88, hsp.prob) self.assertEqual(25, hsp.hit_start) self.assertEqual(48, hsp.hit_end) self.assertEqual(61, hsp.query_start) self.assertEqual(85, hsp.query_end) self.assertEqual('APNVIFDYDA-EGRIVGIELLDAR', str(hsp.hit.seq)) self.assertEqual('QDGIKLMFDAFNQRLKVIEVCDLT', str(hsp.query.seq))
def test_hmmpfam_24(self): """Test parsing hmmpfam 2.4 file (text_24_hmmpfam_001.out).""" results = list( parse(path.join("Hmmer", "text_24_hmmpfam_001.out"), self.fmt)) self.assertEqual(5, len(results)) # first qresult res = results[0] self.assertEqual("random_s00", res.id) self.assertEqual("[none]", res.accession) self.assertEqual("[none]", res.description) self.assertEqual("hmmpfam", res.program) self.assertEqual("2.4i", res.version) self.assertEqual("/home/bow/db/hmmer/Pfam_fs", res.target) self.assertEqual(0, len(res)) # fourth qresult res = results[3] self.assertEqual("gi|22748937|ref|NP_065801.1|", res.id) self.assertEqual("[none]", res.accession) self.assertEqual("exportin-5 [H**o sapiens]", res.description) self.assertEqual("hmmpfam", res.program) self.assertEqual("2.4i", res.version) self.assertEqual("/home/bow/db/hmmer/Pfam_fs", res.target) self.assertEqual(33, len(res)) # fourth qresult, first hit hit = res[0] self.assertEqual("Xpo1", hit.id) self.assertEqual("Exportin 1-like protein", hit.description) self.assertAlmostEqual(170.1, hit.bitscore) self.assertAlmostEqual(5.1e-48, hit.evalue) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, len(hit)) # fourth qresult, first hit, first hsp hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertAlmostEqual(170.1, hsp.bitscore) self.assertAlmostEqual(5.1e-148, hsp.evalue) self.assertEqual(108, hsp.query_start) self.assertEqual(271, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertEqual("ENHIKDALSRIVVEMIKREWPQHWPDMLIELDTLSKQG--", str(hsp.query.seq)[:40]) self.assertEqual( "+++++ L+++++e++k+ewP++Wp+ + +l l++++ ", str(hsp.aln_annotation["similarity"])[:40], ) self.assertEqual("WVSMSHITA-ENCkLLEILCLLL----NEQELQLGAAECL", str(hsp.query.seq)[-40:]) self.assertEqual(0, hsp.hit_start) self.assertEqual(178, hsp.hit_end) self.assertEqual("[]", hsp.hit_endtype) self.assertEqual("pkflrnKLalalaelakqewPsnWpsffpdlvsllsssss", str(hsp.hit.seq)[:40]) self.assertEqual( "W+++++i + ++++ll++l+ lL + +l++ A+eCL", str(hsp.aln_annotation["similarity"])[-40:], ) self.assertEqual("Wipiglianvnpi.llnllfslLsgpesdpdlreaAveCL", str(hsp.hit.seq)[-40:]) # fourth qresult, second from last hit hit = res[-2] self.assertEqual("Rad50_zn_hook", hit.id) self.assertEqual("Rad50 zinc hook motif", hit.description) self.assertAlmostEqual(2.2, hit.bitscore) self.assertAlmostEqual(9.2, hit.evalue) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, len(hit)) # fourth qresult, second from last hit, first hsp hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertAlmostEqual(0.8, hsp.bitscore) self.assertAlmostEqual(22, hsp.evalue) self.assertEqual(20, hsp.query_start) self.assertEqual(47, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertEqual("MDPNSTQRYRLEALKFCEEFKE-KCPIC", str(hsp.query.seq)) self.assertEqual(0, hsp.hit_start) self.assertEqual(28, hsp.hit_end) self.assertEqual("[.", hsp.hit_endtype) self.assertEqual("galesekaelkkaieeleeeesscCPvC", str(hsp.hit.seq)) # fourth qresult, second from last hit, last hsp hsp = hit[-1] self.assertEqual(2, hsp.domain_index) self.assertAlmostEqual(1.3, hsp.bitscore) self.assertAlmostEqual(16, hsp.evalue) self.assertEqual(789, hsp.query_start) self.assertEqual(811, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertEqual("EMLAKMAEPFTKALDMLDAEKS", str(hsp.query.seq)) self.assertEqual(0, hsp.hit_start) self.assertEqual(22, hsp.hit_end) self.assertEqual("[.", hsp.hit_endtype) self.assertEqual("galesekaelkkaieeleeees", str(hsp.hit.seq))
def test_hmmpfam_21(self): """Test parsing hmmpfam 2.1 file (text_21_hmmpfam_001.out).""" results = parse(path.join("Hmmer", "text_21_hmmpfam_001.out"), self.fmt) res = next(results) self.assertEqual("roa1_drome", res.id) self.assertEqual("<unknown description>", res.description) self.assertEqual("hmmpfam", res.program) self.assertEqual("2.1.1", res.version) self.assertEqual("pfam", res.target) self.assertEqual(1, len(res)) hit = res[0] self.assertEqual("SEED", hit.id) self.assertEqual("<unknown description>", hit.description) self.assertAlmostEqual(146.1, hit.bitscore) self.assertAlmostEqual(6.3e-40, hit.evalue) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, len(hit)) hsp = hit[0] self.assertEqual(1, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(77, hsp.hit_end) self.assertEqual("[]", hsp.hit_endtype) self.assertEqual(32, hsp.query_start) self.assertEqual(103, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertAlmostEqual(71.2, hsp.bitscore) self.assertAlmostEqual(2.2e-17, hsp.evalue) self.assertEqual( "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv", str(hsp.hit.seq), ) self.assertEqual( "lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ", str(hsp.aln_annotation["similarity"]), ) self.assertEqual( "LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP", str(hsp.query.seq), ) hsp = hit[1] self.assertEqual(2, hsp.domain_index) self.assertEqual(0, hsp.hit_start) self.assertEqual(77, hsp.hit_end) self.assertEqual("[]", hsp.hit_endtype) self.assertEqual(123, hsp.query_start) self.assertEqual(194, hsp.query_end) self.assertEqual("..", hsp.query_endtype) self.assertAlmostEqual(75.5, hsp.bitscore) self.assertAlmostEqual(1.1e-18, hsp.evalue) self.assertEqual( "lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv", str(hsp.hit.seq), ) self.assertEqual( "lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v", str(hsp.aln_annotation["similarity"]), ) self.assertEqual( "LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV", str(hsp.query.seq), )
def test_9590198(self): "Test parsing hhpred_9590198.hhr" txt_file = get_file('hhpred_9590198.hhr') qresults = parse(txt_file, FMT) # test first and only qresult qresult = next(qresults) num_hits = 22 self.assertEqual('HHSUITE', qresult.program) self.assertEqual('sp|Q9BSU1|CP070_HUMAN UPF0183 protein C16orf70 OS=H**o sapiens OX=9606 GN=C16orf70' ' PE=1 SV=1', qresult.id) self.assertEqual(422, qresult.seq_len) self.assertEqual(num_hits, len(qresult)) hit = qresult[0] self.assertEqual('PF03676.14', hit.id) self.assertEqual('UPF0183 ; Uncharacterised protein family (UPF0183)', hit.description) self.assertTrue(hit.is_included) self.assertEqual(9.9e-102, hit.evalue) self.assertEqual(792.76, hit.score) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertTrue(hsp.is_included) self.assertEqual(0, hsp.output_index) self.assertEqual(9.9e-102, hsp.evalue) self.assertEqual(792.76, hsp.score) self.assertEqual(100.00, hsp.prob) self.assertEqual(0, hsp.hit_start) self.assertEqual(394, hsp.hit_end) self.assertEqual(21, hsp.query_start) self.assertEqual(407, hsp.query_end) self.assertEqual('GMHFSQSVAIIQSQVGTIRGVQVLYSDQNPLSVDLVINMPQDGMRLIFDPVAQRLKIIEIYNMKLVKLRYSGMCFNSPEI' 'TPSIEQVEHCFGATHPGLYDSQRHLFALNFRGLSFYFPVDS-----KFEPGYAHGLGSLQFPNGGSPVVSRTTIYYGSQH' 'QLSSNTSSRVSGVPLPDLPLSCYRQQLHLRRCDVLRNTTSTMGLRLHMFTEGT--SRALEPSQVALVRVVRFGDSCQGVA' 'RALGAPARLYYKADDKMRIHRPTARRR-PPPASDYLFNYFTLGLDVLFDARTNQVKKFVLHTNYPGHYNFNMYHRCEFEL' 'TVQPD-KSEAHSLVESGGGVAVTAYSKWEVVSRAL-RVCERPVVLNRASSTNTTNPFGSTFCYGYQDIIFEVMSNNYIAS' 'ITLY', str(hsp.hit.seq)) self.assertEqual('GMPLAQAVAILQKHCRIIKNVQVLYSEQSPLSHDLILNLTQDGIKLMFDAFNQRLKVIEVCDLTKVKLKYCGVHFNSQAI' 'APTIEQIDQSFGATHPGVYNSAEQLFHLNFRGLSFSFQLDSWTEAPKYEPNFAHGLASLQIPHGA--TVKRMYIYSGNSL' 'Q---------DTKA-PMMPLSCFLGNVYAESVDVLRDGTGPAGLRLRLLAAGCGPGLLADAKMRVFERSVYFGDSCQDVL' 'SMLGSPHKVFYKSEDKMKIHSPSPHKQVPSKCNDYFFNYFTLGVDILFDANTHKVKKFVLHTNYPGHYNFNIYHRCEFKI' 'PLAIKKENADG------QTETCTTYSKWDNIQELLGHPVEKPVVLHRSSSPNNTNPFGSTFCFGLQRMIFEVMQNNHIAS' 'VTLY', str(hsp.query.seq)) # Check last hit hit = qresult[num_hits - 1] self.assertEqual('4IL7_A', hit.id) self.assertEqual('Putative uncharacterized protein; partial jelly roll fold, hypothetical; 1.4A ' '{Sulfolobus turreted icosahedral virus}', hit.description) self.assertTrue(hit.is_included) self.assertEqual(6.8e+02, hit.evalue) self.assertEqual(22.72, hit.score) self.assertEqual(1, len(hit)) # Check we can get the original last HSP from the file. num_hsps = 34 self.assertEqual(num_hsps, len(qresult.hsps)) hsp = qresult.hsps[-1] self.assertTrue(hsp.is_included) self.assertEqual(num_hsps - 1, hsp.output_index) self.assertEqual(3.9e+02, hsp.evalue) self.assertEqual(22.84, hsp.score) self.assertEqual(21.56, hsp.prob) self.assertEqual(7, hsp.hit_start) self.assertEqual(96, hsp.hit_end) self.assertEqual(18, hsp.query_start) self.assertEqual(114, hsp.query_end) self.assertEqual('FTLGMPLAQAVAILQKHCRIIKNVQVLYSEQSPLSHDLILNLTQDGIKLMFDAFNQRLKVIEVCDLTKVKLKYCGVH-FN' 'SQAIAPTIEQIDQSFGA', str(hsp.query.seq)) self.assertEqual('IQFGMDRTLVWQLAGADQSCSDQVERIICYNNPDH-------YGPQGHFFFNA-ADKLIHKRQMELFPAPKPTMRLATYN' 'KTQTGMTEAQFWAAVPS', str(hsp.hit.seq))
df = pd.read_table('../presence_absence-merged.tab', index_col=0) # # evaluate hmmer outputs # hmm_positives = {} for group in groups.keys(): group = group.replace('&', '-') result = group+'.hmm.hmmout' if not isfile('%s/hmm/%s' %(vfdb_folder, result)): print '%s not found!' %result continue result = parse('%s/hmm/%s' %(vfdb_folder, result), 'hmmer3-text').next() if not result.hits: continue best_hit = result.hits[0] if best_hit.evalue <= 1e-10: hsp = best_hit.hsps[0] hmm_positives[group] = { 'bitscore' : best_hit.bitscore, 'bias' : best_hit.bias, 'evalue' : best_hit.evalue, 'acc' : hsp.acc_avg, 'coverage' : float(hsp.query_end - hsp.query_start) / result.seq_len, 'desc' : control_ids[best_hit.id] }
def test_30_hmmscan_002(self): """Test parsing hmmer3-tab, hmmscan 3.0, single query, no hits (tab_30_hmmscan_002).""" tab_file = get_file('tab_30_hmmscan_002.out') qresults = parse(tab_file, FMT) self.assertRaises(StopIteration, next, qresults)
def test_allx(self): "Test parsing allx.hhr" txt_file = get_file('allx.hhr') qresults = parse(txt_file, FMT) # test first and only qresult qresult = next(qresults) num_hits = 10 self.assertEqual('HHSUITE', qresult.program) self.assertEqual('Only X amino acids', qresult.id) self.assertEqual(39, qresult.seq_len) self.assertEqual(num_hits, len(qresult)) hit = qresult[0] self.assertEqual('1klr_A', hit.id) self.assertEqual('Zinc finger Y-chromosomal protein; transcription; NMR {Synthetic} SCOP: g.37.1.1 PDB: ' '5znf_A 1kls_A 1xrz_A* 7znf_A', hit.description) self.assertTrue(hit.is_included) self.assertEqual(3.4E+04, hit.evalue) self.assertEqual(-0.01, hit.score) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertTrue(hsp.is_included) self.assertEqual(0, hsp.output_index) self.assertEqual(3.4E+04, hsp.evalue) self.assertEqual(-0.01, hsp.score) self.assertEqual(0.04, hsp.prob) self.assertEqual(23, hsp.hit_start) self.assertEqual(24, hsp.hit_end) self.assertEqual(38, hsp.query_start) self.assertEqual(39, hsp.query_end) self.assertEqual('T', str(hsp.hit.seq)) self.assertEqual('X', str(hsp.query.seq)) # Check last hit hit = qresult[num_hits - 1] self.assertEqual('1zfd_A', hit.id) self.assertEqual('SWI5; DNA binding motif, zinc finger DNA binding domain; NMR {Saccharomyces cerevisiae}' ' SCOP: g.37.1.1', hit.description) self.assertTrue(hit.is_included) self.assertEqual(3.6e+04, hit.evalue) self.assertEqual(0.03, hit.score) self.assertEqual(1, len(hit)) # Check we can get the original last HSP from the file. num_hsps = num_hits self.assertEqual(num_hsps, len(qresult.hsps)) hsp = qresult.hsps[-1] self.assertTrue(hsp.is_included) self.assertEqual(num_hsps - 1, hsp.output_index) self.assertEqual(3.6e+04, hsp.evalue) self.assertEqual(0.03, hsp.score) self.assertEqual(0.03, hsp.prob) self.assertEqual(0, hsp.hit_start) self.assertEqual(1, hsp.hit_end) self.assertEqual(3, hsp.query_start) self.assertEqual(4, hsp.query_end) self.assertEqual('D', str(hsp.hit.seq)) self.assertEqual('X', str(hsp.query.seq))
def test_30_hmmscan_001(self): "Test parsing hmmer3-tab, hmmscan 3.0, multiple queries (tab_30_hmmscan_001)" tab_file = get_file('tab_30_hmmscan_001.out') qresults = parse(tab_file, FMT) counter = 0 # first qresult qresult = next(qresults) counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.acc) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.acc) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) # second qresult qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|126362951:116-221', qresult.id) self.assertEqual('-', qresult.acc) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Ig_3', hit.id) self.assertEqual('PF13927.1', hit.acc) self.assertEqual(1.4e-09, hit.evalue) self.assertEqual(38.2, hit.bitscore) self.assertEqual(0.4, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(2.1e-09, hsp.evalue) self.assertEqual(37.6, hsp.bitscore) self.assertEqual(0.3, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Ig_2', hit.id) self.assertEqual('PF13895.1', hit.acc) self.assertEqual(3.5e-05, hit.evalue) self.assertEqual(23.7, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.1, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.3e-05, hsp.evalue) self.assertEqual(23.4, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # third qresult qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|22748937|ref|NP_065801.1|', qresult.id) self.assertEqual('-', qresult.acc) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Xpo1', hit.id) self.assertEqual('PF08389.7', hit.acc) self.assertEqual(7.8e-34, hit.evalue) self.assertEqual(116.6, hit.bitscore) self.assertEqual(7.8, hit.bias) self.assertEqual(2.8, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Exportin 1-like protein', hit.description) hsp = hit.hsps[0] self.assertEqual(1.1e-33, hsp.evalue) self.assertEqual(116.1, hsp.bitscore) self.assertEqual(3.4, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('IBN_N', hit.id) self.assertEqual('PF03810.14', hit.acc) self.assertEqual(0.0039, hit.evalue) self.assertEqual(16.9, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.7, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Importin-beta N-terminal domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.033, hsp.evalue) self.assertEqual(14.0, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # last qresult qresult = next(qresults) counter += 1 self.assertEqual(5, len(qresult)) self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id) self.assertEqual('-', qresult.acc) # first hit hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Pou', hit.id) self.assertEqual('PF00157.12', hit.acc) self.assertEqual(7e-37, hit.evalue) self.assertEqual(124.8, hit.bitscore) self.assertEqual(0.5, hit.bias) self.assertEqual(1.5, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Pou domain - N-terminal to homeobox domain', hit.description) hsp = hit.hsps[0] self.assertEqual(1.4e-36, hsp.evalue) self.assertEqual(123.9, hsp.bitscore) self.assertEqual(0.3, hsp.bias) # second hit hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Homeobox', hit.id) self.assertEqual('PF00046.24', hit.acc) self.assertEqual(2.1e-18, hit.evalue) self.assertEqual(65.5, hit.bitscore) self.assertEqual(1.1, hit.bias) self.assertEqual(1.5, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Homeobox domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.1e-18, hsp.evalue) self.assertEqual(64.6, hsp.bitscore) self.assertEqual(0.7, hsp.bias) # third hit hit = qresult[2] self.assertEqual(1, len(hit)) self.assertEqual('HTH_31', hit.id) self.assertEqual('PF13560.1', hit.acc) self.assertEqual(0.012, hit.evalue) self.assertEqual(15.6, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.2, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Helix-turn-helix domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.16, hsp.evalue) self.assertEqual(12.0, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # fourth hit hit = qresult[3] self.assertEqual(1, len(hit)) self.assertEqual('Homeobox_KN', hit.id) self.assertEqual('PF05920.6', hit.acc) self.assertEqual(0.039, hit.evalue) self.assertEqual(13.5, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(1.6, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Homeobox KN domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.095, hsp.evalue) self.assertEqual(12.3, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # fifth hit hit = qresult[4] self.assertEqual(1, len(hit)) self.assertEqual('DUF521', hit.id) self.assertEqual('PF04412.8', hit.acc) self.assertEqual(0.14, hit.evalue) self.assertEqual(10.5, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.4, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Protein of unknown function (DUF521)', hit.description) hsp = hit.hsps[0] self.assertEqual(0.26, hsp.evalue) self.assertEqual(9.6, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(4, counter)
def test_2uvo(self): "Test parsing 2uvo" txt_file = get_file('2uvo_hhblits.hhr') qresults = parse(txt_file, FMT) # test first and only qresult qresult = next(qresults) num_hits = 16 self.assertEqual('HHSUITE', qresult.program) self.assertEqual('2UVO:A|PDBID|CHAIN|SEQUENCE', qresult.id) self.assertEqual(171, qresult.seq_len) self.assertEqual(num_hits, len(qresult)) hit = qresult[0] self.assertEqual('2uvo_A', hit.id) self.assertEqual('Agglutinin isolectin 1; carbohydrate-binding protein, hevein domain, chitin-binding,' ' GERM agglutinin, chitin-binding protein; HET: NDG NAG GOL; 1.40A {Triticum aestivum}' ' PDB: 1wgc_A* 2cwg_A* 2x3t_A* 4aml_A* 7wga_A 9wga_A 2wgc_A 1wgt_A 1k7t_A* 1k7v_A* 1k7u_A' ' 2x52_A* 1t0w_A*', hit.description) self.assertTrue(hit.is_included) self.assertEqual(3.7e-34, hit.evalue) self.assertEqual(210.31, hit.score) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertTrue(hsp.is_included) self.assertEqual(0, hsp.output_index) self.assertEqual(99.95, hsp.prob) self.assertEqual(210.31, hsp.score) self.assertEqual(3.7e-34, hsp.evalue) self.assertEqual(0, hsp.hit_start) self.assertEqual(171, hsp.hit_end) self.assertEqual(0, hsp.query_start) self.assertEqual(171, hsp.query_end) self.assertEqual('ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGATCTNNQCCSQYGYCGFGAEYC' 'GAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGGCQSGACSTDKPCGKDAGGRVCTNNYCCS' 'KWGSCGIGPGYCGAGCQSGGCDG', str(hsp.hit.seq)) self.assertEqual('ERCGEQGSNMECPNNLCCSQYGYCGMGGDYCGKGCQNGACWTSKRCGSQAGGATCTNNQCCSQYGYCGFGAEYC' 'GAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGGCQSGACSTDKPCGKDAGGRVCTNNYCCS' 'KWGSCGIGPGYCGAGCQSGGCDG', str(hsp.query.seq)) # Check last hit hit = qresult[num_hits - 1] self.assertEqual('4z8i_A', hit.id) self.assertEqual('BBTPGRP3, peptidoglycan recognition protein 3; chitin-binding domain, ' 'AM hydrolase; 2.70A {Branchiostoma belcheri tsingtauense}', hit.description) self.assertTrue(hit.is_included) self.assertEqual(0.11, hit.evalue) self.assertEqual(36.29, hit.score) self.assertEqual(2, len(hit)) # Check we can get the original last HSP from the file. num_hsps = 32 self.assertEqual(num_hsps, len(qresult.hsps)) hsp = qresult.hsps[-1] self.assertTrue(hsp.is_included) self.assertEqual(num_hsps - 1, hsp.output_index) self.assertEqual(2.6, hsp.evalue) self.assertEqual(25.90, hsp.score) self.assertEqual(40.43, hsp.prob) self.assertEqual(10, hsp.hit_start) self.assertEqual(116, hsp.hit_end) self.assertEqual(53, hsp.query_start) self.assertEqual(163, hsp.query_end) self.assertEqual('XCXXXXCCXXXXXCXXXXXXCXXXCXXXXCXXXXXCXXX--XXXCXXXXCCXXXXXCXXXXXXCXXXCXXXXCXXXXXCX' 'XX--XXXCXXXXCCXXXXXCXXXXXXCXXX', str(hsp.hit.seq)) self.assertEqual('TCTNNQCCSQYGYCGFGAEYCGAGCQGGPCRADIKCGSQAGGKLCPNNLCCSQWGFCGLGSEFCGGGCQSGACSTDKPCG' 'KDAGGRVCTNNYCCSKWGSCGIGPGYCGAG', str(hsp.query.seq))
def test_tab_2226_tblastn_001(self): """Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_001).""" xml_file = get_file("tab_2226_tblastn_001.txt") qresults = parse(xml_file, FMT) counter = 0 # test first qresult qresult = next(qresults) counter += 1 self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(34.88, hsp.ident_pct) self.assertEqual(43, hsp.aln_span) self.assertEqual(28, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(30, hsp.query_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1743, hsp.hit_start) self.assertEqual(1872, hsp.hit_end) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) hit = qresult[-1] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(33.90, hsp.ident_pct) self.assertEqual(59, hsp.aln_span) self.assertEqual(31, hsp.mismatch_num) self.assertEqual(1, hsp.gapopen_num) self.assertEqual(43, hsp.query_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1056, hsp.hit_start) self.assertEqual(1233, hsp.hit_end) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) # test last qresult qresult = next(qresults) counter += 1 self.assertEqual("gi|11464971:4-101", qresult.id) self.assertEqual(5, len(qresult)) hit = qresult[0] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id) self.assertEqual("gi|11464971:4-101", hit.query_id) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id) self.assertEqual("gi|11464971:4-101", hsp.query_id) self.assertEqual(95.92, hsp.ident_pct) self.assertEqual(98, hsp.aln_span) self.assertEqual(4, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(0, hsp.query_start) self.assertEqual(98, hsp.query_end) self.assertEqual(94, hsp.hit_start) self.assertEqual(388, hsp.hit_end) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(199, hsp.bitscore) hsp = hit.hsps[-1] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id) self.assertEqual("gi|11464971:4-101", hsp.query_id) self.assertEqual(29.58, hsp.ident_pct) self.assertEqual(71, hsp.aln_span) self.assertEqual(46, hsp.mismatch_num) self.assertEqual(2, hsp.gapopen_num) self.assertEqual(29, hsp.query_start) self.assertEqual(96, hsp.query_end) self.assertEqual(541, hsp.hit_start) self.assertEqual(754, hsp.hit_end) self.assertEqual(4e-05, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) # check if we've finished iteration over qresults self.assertRaises(StopIteration, next, qresults) self.assertEqual(2, counter)
def test_tab_2226_tblastn_010(self): """Test parsing TBLASTN 2.2.26+ tabular output with comments (tab_2226_tblastn_010).""" xml_file = get_file("tab_2226_tblastn_010.txt") qresults = parse(xml_file, FMT, comments=True) counter = 0 # test first qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("db/minirefseq_mrna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("random_s00", qresult.id) self.assertEqual(0, len(qresult)) # test second qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("db/minirefseq_mrna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("gi|16080617|ref|NP_391444.1|", qresult.id) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) hit = qresult[-1] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) # test last qresult qresult = next(qresults) counter += 1 self.assertEqual("tblastn", qresult.program) self.assertEqual("db/minirefseq_mrna", qresult.target) self.assertEqual("2.2.26+", qresult.version) self.assertEqual("gi|11464971:4-101", qresult.id) self.assertEqual(5, len(qresult)) hit = qresult[0] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hit.id) self.assertEqual("gi|11464971:4-101", hit.query_id) self.assertEqual(2, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id) self.assertEqual("gi|11464971:4-101", hsp.query_id) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(199, hsp.bitscore) hsp = hit.hsps[-1] self.assertEqual("gi|350596019|ref|XM_003360601.2|", hsp.hit_id) self.assertEqual("gi|11464971:4-101", hsp.query_id) self.assertEqual(4e-05, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) # check if we've finished iteration over qresults self.assertRaises(StopIteration, next, qresults) self.assertEqual(3, counter)
def test_30_hmmscan_001(self): "Test parsing hmmer3-tab, hmmscan 3.0, multiple queries (tab_30_hmmscan_001)" tab_file = get_file('tab_30_hmmscan_001.out') qresults = parse(tab_file, FMT) counter = 0 # first qresult qresult = next(qresults) counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) # second qresult qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|126362951:116-221', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Ig_3', hit.id) self.assertEqual('PF13927.1', hit.accession) self.assertEqual(1.4e-09, hit.evalue) self.assertEqual(38.2, hit.bitscore) self.assertEqual(0.4, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(2.1e-09, hsp.evalue) self.assertEqual(37.6, hsp.bitscore) self.assertEqual(0.3, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Ig_2', hit.id) self.assertEqual('PF13895.1', hit.accession) self.assertEqual(3.5e-05, hit.evalue) self.assertEqual(23.7, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.1, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.3e-05, hsp.evalue) self.assertEqual(23.4, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # third qresult qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|22748937|ref|NP_065801.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Xpo1', hit.id) self.assertEqual('PF08389.7', hit.accession) self.assertEqual(7.8e-34, hit.evalue) self.assertEqual(116.6, hit.bitscore) self.assertEqual(7.8, hit.bias) self.assertEqual(2.8, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Exportin 1-like protein', hit.description) hsp = hit.hsps[0] self.assertEqual(1.1e-33, hsp.evalue) self.assertEqual(116.1, hsp.bitscore) self.assertEqual(3.4, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('IBN_N', hit.id) self.assertEqual('PF03810.14', hit.accession) self.assertEqual(0.0039, hit.evalue) self.assertEqual(16.9, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.7, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Importin-beta N-terminal domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.033, hsp.evalue) self.assertEqual(14.0, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # last qresult qresult = next(qresults) counter += 1 self.assertEqual(5, len(qresult)) self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id) self.assertEqual('-', qresult.accession) # first hit hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Pou', hit.id) self.assertEqual('PF00157.12', hit.accession) self.assertEqual(7e-37, hit.evalue) self.assertEqual(124.8, hit.bitscore) self.assertEqual(0.5, hit.bias) self.assertEqual(1.5, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Pou domain - N-terminal to homeobox domain', hit.description) hsp = hit.hsps[0] self.assertEqual(1.4e-36, hsp.evalue) self.assertEqual(123.9, hsp.bitscore) self.assertEqual(0.3, hsp.bias) # second hit hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Homeobox', hit.id) self.assertEqual('PF00046.24', hit.accession) self.assertEqual(2.1e-18, hit.evalue) self.assertEqual(65.5, hit.bitscore) self.assertEqual(1.1, hit.bias) self.assertEqual(1.5, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Homeobox domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.1e-18, hsp.evalue) self.assertEqual(64.6, hsp.bitscore) self.assertEqual(0.7, hsp.bias) # third hit hit = qresult[2] self.assertEqual(1, len(hit)) self.assertEqual('HTH_31', hit.id) self.assertEqual('PF13560.1', hit.accession) self.assertEqual(0.012, hit.evalue) self.assertEqual(15.6, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.2, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Helix-turn-helix domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.16, hsp.evalue) self.assertEqual(12.0, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # fourth hit hit = qresult[3] self.assertEqual(1, len(hit)) self.assertEqual('Homeobox_KN', hit.id) self.assertEqual('PF05920.6', hit.accession) self.assertEqual(0.039, hit.evalue) self.assertEqual(13.5, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(1.6, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Homeobox KN domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.095, hsp.evalue) self.assertEqual(12.3, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # fifth hit hit = qresult[4] self.assertEqual(1, len(hit)) self.assertEqual('DUF521', hit.id) self.assertEqual('PF04412.8', hit.accession) self.assertEqual(0.14, hit.evalue) self.assertEqual(10.5, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.4, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Protein of unknown function (DUF521)', hit.description) hsp = hit.hsps[0] self.assertEqual(0.26, hsp.evalue) self.assertEqual(9.6, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(4, counter)
def test_tab_2226_tblastn_013(self): """Test parsing TBLASTN 2.2.26+ tabular output (tab_2226_tblastn_013).""" xml_file = get_file("tab_2226_tblastn_013.txt") qresults = parse(xml_file, FMT, fields="qseq std sseq") counter = 0 qresult = next(qresults) counter += 1 self.assertEqual("<unknown program>", qresult.program) self.assertEqual("<unknown target>", qresult.target) self.assertEqual("<unknown version>", qresult.version) self.assertEqual(3, len(qresult)) hit = qresult[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hit.id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|145479850|ref|XM_001425911.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(34.88, hsp.ident_pct) self.assertEqual(43, hsp.aln_span) self.assertEqual(28, hsp.mismatch_num) self.assertEqual(0, hsp.gapopen_num) self.assertEqual(30, hsp.query_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1743, hsp.hit_start) self.assertEqual(1872, hsp.hit_end) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) self.assertEqual("PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD", hsp.query.seq) self.assertEqual("PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID", hsp.hit.seq) hit = qresult[-1] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hit.id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hit.query_id) self.assertEqual(1, len(hit)) hsp = hit.hsps[0] self.assertEqual("gi|115975252|ref|XM_001180111.1|", hsp.hit_id) self.assertEqual("gi|16080617|ref|NP_391444.1|", hsp.query_id) self.assertEqual(33.90, hsp.ident_pct) self.assertEqual(59, hsp.aln_span) self.assertEqual(31, hsp.mismatch_num) self.assertEqual(1, hsp.gapopen_num) self.assertEqual(43, hsp.query_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1056, hsp.hit_start) self.assertEqual(1233, hsp.hit_end) self.assertEqual(1e-04, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) self.assertEqual( "GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL", hsp.query.seq) self.assertEqual( "GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL", hsp.hit.seq) # check if we've finished iteration over qresults self.assertRaises(StopIteration, next, qresults) self.assertEqual(1, counter)