class TestHitList(test.Case): def setUp(self): super(TestHitList, self).setUp() self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10) self.h1.add_alignment('A-CD', 'A-CD') self.h2.add_alignment('BCD', 'BCD') self.hits = HHpredHitList([self.h1, self.h2]) def testLength(self): self.assertEqual(len(self.hits), 2) def testSort(self): hits = HHpredHitList([self.h2, self.h1]) self.assertEqual(hits[0], self.h2) self.assertEqual(hits[1], self.h1) hits.sort() self.assertEqual(hits[0], self.h1) self.assertEqual(hits[1], self.h2) def testIterator(self): self.assertEqual(list(self.hits), [self.h1, self.h2])
def setUp(self): super(TestHitList, self).setUp() self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10) self.h1.add_alignment('A-CD', 'A-CD') self.h2.add_alignment('BCD', 'BCD') self.hits = HHpredHitList([self.h1, self.h2])
class TestHit(test.Case): def setUp(self): super(TestHit, self).setUp() self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10) def testEquals(self): hit = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.assertTrue(self.h1.equals(hit)) def testSurpasses(self): self.assertTrue(self.h1.surpasses(HHpredHit(5, 'hitX', 2, 3, 3, 4, 0.5, 10))) self.assertTrue(self.h1.surpasses(HHpredHit(5, 'hitX', 2, 5, 3, 6, 0.3, 10))) def testIncludes(self): self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 2, 3, 3, 4, 0.5, 10))) self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 2, 4, 3, 5, 0.3, 10))) self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=3)) self.assertFalse(self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=2)) self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=1)) self.assertFalse(self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=0)) def testLength(self): self.assertEqual(self.h1.length, 4) def testAlignment(self): q, s = 'AB-D', 'A-CD' self.h1.add_alignment(q, s) # query, subject self.assertEqual(self.h1.alignment.query, q) self.assertEqual(self.h1.alignment.subject, s) # segments segments = list(self.h1.alignment.segments) self.assertEqual(len(segments), 2) self.assertEqual(segments[0].qstart, 3) self.assertEqual(segments[0].qend, 3) self.assertEqual(segments[0].start, 2) self.assertEqual(segments[0].end, 2) self.assertEqual(segments[1].qstart, 5) self.assertEqual(segments[1].qend, 5) self.assertEqual(segments[1].start, 4) self.assertEqual(segments[1].end, 4) # to_a3m a3m = self.h1.alignment.to_a3m().format(headers=False) self.assertEqual(a3m.strip(), 'ABD\nA-cD')
def testIncludes(self): self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 2, 3, 3, 4, 0.5, 10))) self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 2, 4, 3, 5, 0.3, 10))) self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=3)) self.assertFalse( self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=2)) self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=1)) self.assertFalse( self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=0))
def testSurpasses(self): self.assertTrue( self.h1.surpasses(HHpredHit(5, 'hitX', 2, 3, 3, 4, 0.5, 10))) self.assertTrue( self.h1.surpasses(HHpredHit(5, 'hitX', 2, 5, 3, 6, 0.3, 10)))
def testEquals(self): hit = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.assertTrue(self.h1.equals(hit))
def setUp(self): super(TestHit, self).setUp() self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)
class TestHit(test.Case): def setUp(self): super(TestHit, self).setUp() self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10) def testEquals(self): hit = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.assertTrue(self.h1.equals(hit)) def testSurpasses(self): self.assertTrue( self.h1.surpasses(HHpredHit(5, 'hitX', 2, 3, 3, 4, 0.5, 10))) self.assertTrue( self.h1.surpasses(HHpredHit(5, 'hitX', 2, 5, 3, 6, 0.3, 10))) def testIncludes(self): self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 2, 3, 3, 4, 0.5, 10))) self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 2, 4, 3, 5, 0.3, 10))) self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=3)) self.assertFalse( self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=2)) self.assertTrue( self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=1)) self.assertFalse( self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=0)) def testLength(self): self.assertEqual(self.h1.length, 4) def testAlignment(self): q, s = 'AB-D', 'A-CD' self.h1.add_alignment(q, s) # query, subject self.assertEqual(self.h1.alignment.query, q) self.assertEqual(self.h1.alignment.subject, s) # segments segments = list(self.h1.alignment.segments) self.assertEqual(len(segments), 2) self.assertEqual(segments[0].qstart, 3) self.assertEqual(segments[0].qend, 3) self.assertEqual(segments[0].start, 2) self.assertEqual(segments[0].end, 2) self.assertEqual(segments[1].qstart, 5) self.assertEqual(segments[1].qend, 5) self.assertEqual(segments[1].start, 4) self.assertEqual(segments[1].end, 4) # to_a3m a3m = self.h1.alignment.to_a3m().format(headers=False) self.assertEqual(a3m.strip(), 'ABD\nA-cD')
def _parse(self, stream, header_only): qlen = None in_hits = False in_alis = False has_alis = False c_rank = 0 header = {} hits = {} alis = {} for line in stream: if not in_hits and not in_alis: if line.replace(' ', '').startswith('NoHitProbE-value'): in_hits = True continue elif line.strip() == '': continue else: # parse header data (stuff above the hits table) columns = line.strip().split(None, 1) if len(columns) == 2: identifier, data = columns if identifier in ('Query', 'Command'): data = data.strip() elif identifier == 'Neff': data = float(data) elif identifier in ('Searched_HMMs', 'Match_columns'): data = int(data) header[identifier] = data if identifier == 'Match_columns': qlen = data if in_hits and not header_only: if not line.strip(): # suboptimal way to handle block switch in_hits = False in_alis = True if self.alignments: continue else: break elif line.strip() == 'Done': in_hits = False in_alis = False break description = line[:34].split() rank = int(description[0]) id = description[1] pos = line[85:94].strip() start, end = map(int, pos.split('-')) qpos = line[75:84].strip() qstart, qend = map(int, qpos.split('-')) probability = float(line[35:40]) / 100.0 hit = HHpredHit(rank, id, start, end, qstart, qend, probability, qlen) hit.evalue = float(line[41:48]) hit.pvalue = float(line[49:56]) hit.score = float(line[57:63]) hit.ss_score = float(line[64:69]) hit.slength = int(line[94:].replace('(', '').replace(')', '')) hits[hit.rank] = hit alis[hit.rank] = {'q': [], 's': []} elif in_alis and not header_only: if line.startswith('Done'): in_alis = False break elif line.startswith('No '): c_rank = int(line[3:]) if c_rank not in hits: raise HHOutputFormatError( 'Alignment {0}. refers to a non-existing hit'. format(c_rank)) elif line.startswith('>'): hits[c_rank].name = line[1:].strip() elif line.startswith('Probab='): for pair in line.split(): key, value = pair.split('=') if key == 'Identities': hits[c_rank].identity = float( value.replace('%', '')) elif key == 'Similarity': hits[c_rank].similarity = float(value) elif key == 'Sum_probs': hits[c_rank].prob_sum = float(value) elif line.startswith('Q ') and not line[:11].rstrip() in ( 'Q Consensus', 'Q ss_pred', 'Q ss_conf', 'Q ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['q'].append(residue) has_alis = True elif line.startswith('T ') and not line[:11].rstrip() in ( 'T Consensus', 'T ss_pred', 'T ss_conf', 'T ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['s'].append(residue) if self.alignments and has_alis: for rank in alis: try: hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s']) except (KeyError, ValueError) as er: raise HHOutputFormatError( 'Corrupt alignment at hit No {0}.\n {1}'.format( rank, er)) del alis hits = HHpredHitList(hits.values()) hits.sort() ## add data obtained from the header to the HHpredHitList for identifier, data in header.items(): if identifier == 'Query': hits.query_name = data elif identifier == 'Match_columns': hits.match_columns = data elif identifier == 'No_of_seqs': hits.no_of_seqs = data elif identifier == 'Neff': hits.neff = data elif identifier == 'Searched_HMMs': hits.searched_hmms = data elif identifier == 'Date': hits.date = data elif identifier == 'Command': hits.command = data return hits
def _parse(self, stream, header_only): qlen = None in_hits = False in_alis = False has_alis = False c_rank = 0 header = {} hits = {} alis = {} for line in stream: if not in_hits and not in_alis: if line.replace(' ', '').startswith('NoHitProbE-value'): in_hits = True continue elif line.strip() == '': continue else: # parse header data (stuff above the hits table) columns = line.strip().split(None, 1) if len(columns) == 2: identifier, data = columns if identifier in ('Query', 'Command'): data = data.strip() elif identifier == 'Neff': data = float(data) elif identifier in ('Searched_HMMs', 'Match_columns'): data = int(data) header[identifier] = data if identifier == 'Match_columns': qlen = data if in_hits and not header_only: if not line.strip(): # suboptimal way to handle block switch in_hits = False in_alis = True if self.alignments: continue else: break elif line.strip() == 'Done': in_hits = False in_alis = False break description = line[:34].split() rank = int(description[0]) id = description[1] pos = line[85:94].strip() start, end = map(int, pos.split('-')) qpos = line[75:84].strip() qstart, qend = map(int, qpos.split('-')) probability = float(line[35:40]) / 100.0 hit = HHpredHit(rank, id, start, end, qstart, qend, probability, qlen) hit.evalue = float(line[41:48]) hit.pvalue = float(line[49:56]) hit.score = float(line[57:63]) hit.ss_score = float(line[64:69]) hit.slength = int(line[94:].replace('(', '').replace(')', '')) hits[hit.rank] = hit alis[hit.rank] = {'q': [], 's': []} elif in_alis and not header_only: if line.startswith('Done'): in_alis = False break elif line.startswith('No '): c_rank = int(line[3:]) if c_rank not in hits: raise HHOutputFormatError('Alignment {0}. refers to a non-existing hit'.format(c_rank)) elif line.startswith('>'): hits[c_rank].name = line[1:].strip() elif line.startswith('Probab='): for pair in line.split(): key, value = pair.split('=') if key == 'Identities': hits[c_rank].identity = float( value.replace('%', '')) elif key == 'Similarity': hits[c_rank].similarity = float(value) elif key == 'Sum_probs': hits[c_rank].prob_sum = float(value) elif line.startswith('Q ') and not line[:11].rstrip() in ('Q Consensus', 'Q ss_pred','Q ss_conf', 'Q ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['q'].append(residue) has_alis = True elif line.startswith('T ') and not line[:11].rstrip() in ('T Consensus', 'T ss_pred','T ss_conf', 'T ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['s'].append(residue) if self.alignments and has_alis: for rank in alis: try: hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s']) except (KeyError, ValueError) as er: raise HHOutputFormatError('Corrupt alignment at hit No {0}.\n {1}'.format(rank, er)) del alis hits = HHpredHitList(hits.values()) hits.sort() ## add data obtained from the header to the HHpredHitList for identifier, data in header.items(): if identifier == 'Query': hits.query_name = data elif identifier == 'Match_columns': hits.match_columns = data elif identifier == 'No_of_seqs': hits.no_of_seqs = data elif identifier == 'Neff': hits.neff = data elif identifier == 'Searched_HMMs': hits.searched_hmms = data elif identifier == 'Date': hits.date = data elif identifier == 'Command': hits.command = data return hits