def test_parse_hmm_body(self): def make_hmm_group(hmm_string): hmm_file = StringIO(hmm_string) hmm_hits = ( x[1] for x in groupby(hmm_file, lambda l: l.startswith('>>'))) header = next(hmm_hits) body = next(hmm_hits) return body gene_name = "gspD" c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) report_path = os.path.join(self.cfg.working_dir(), gene_name + self.cfg.res_search_suffix()) report = GembaseHMMReport(c_gene, report_path, self.cfg) # with one significant hit hmm = """>> NC_xxxxx_xx_056141 C ATG TAA 6260390 6261757 Valid PA5567 1368 _NP_254254.1_ PA5567 1 6260390 6261757 | tRNA modific # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- 1 ! 779.2 5.5 1.4e-237 2e-236 1 596 [] 104 741 .. 104 741 .. 0.93 Alignments for each domain: """ body = make_hmm_group(hmm) hits = report._parse_hmm_body('NC_xxxxx_xx_056141', 596, 803, 0.5, 'NC_xxxxx_xx', 141, 0.5, body) expected_hits = [ CoreHit(c_gene, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141, float(2e-236), float(779.2), float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741) ] self.assertListEqual(hits, expected_hits) # with no significant hit hmm = """>> PSAE001c01_051090 C ATG TGA 5675714 5677858 Valid pilQ 2145 _PA5040_NP_253727.1_ PA5040 1 5675714 5677858 | type 4 f # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- 1 ! 27.1 0.2 6.3e-10 6.6e-07 1 120 [. 286 402 .. 286 407 .. 0.86 2 ! 186.2 0.1 4.2e-58 4.3e-55 294 590 .. 405 709 .. 397 712 .. 0.84 Alignments for each domain: """ body = make_hmm_group(hmm) hits = report._parse_hmm_body('NC_xxxxx_xx_056141', 596, 803, 0.5, 'NC_xxxxx_xx', 141, 0.5, body) expected_hits = [] self.assertListEqual(hits, expected_hits) # with no hit hmm = """>> PSAE001c01_051090 C ATG TGA 5675714 5677858 Valid pilQ 2145 _PA5040_NP_253727.1_ PA5040 1 5675714 5677858 | type 4 f bla bla """ body = make_hmm_group(hmm) hits = report._parse_hmm_body('NC_xxxxx_xx_056141', 596, 803, 0.5, 'NC_xxxxx_xx', 141, 0.5, body) expected_hits = [] self.assertListEqual(hits, expected_hits) # with invalid hmm hmm = """>> NC_xxxxx_xx_056141 C ATG TAA 6260390 6261757 Valid PA5567 1368 _NP_254254.1_ PA5567 1 6260390 6261757 | tRNA modific # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- 1 ! 779.2 5.5 1.4e-237 foo 1 596 [] 104 741 .. 104 741 .. 0.93 Alignments for each domain: """ body = make_hmm_group(hmm) with self.assertRaises(ValueError) as ctx: report._parse_hmm_body('NC_xxxxx_xx_056141', 596, 803, 0.5, 'NC_xxxxx_xx', 141, 0.5, body) self.assertEqual( str(ctx.exception), """Invalid line to parse : 1 ! 779.2 5.5 1.4e-237 foo 1 596 [] 104 741 .. 104 741 .. 0.93 :could not convert string to float: 'foo'""")