def testSort(self): hits = HHpredHitList([self.h2, self.h1]) self.assertEqual(hits[0], self.h2) self.assertEqual(hits[1], self.h1) hits.sort() self.assertEqual(hits[0], self.h1) self.assertEqual(hits[1], self.h2)
def setUp(self): super(TestHitList, self).setUp() self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10) self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10) self.h1.add_alignment('A-CD', 'A-CD') self.h2.add_alignment('BCD', 'BCD') self.hits = HHpredHitList([self.h1, self.h2])
def _parse(self, stream, header_only): qlen = None in_hits = False in_alis = False has_alis = False c_rank = 0 header = {} hits = {} alis = {} for line in stream: if not in_hits and not in_alis: if line.replace(' ', '').startswith('NoHitProbE-value'): in_hits = True continue elif line.strip() == '': continue else: # parse header data (stuff above the hits table) columns = line.strip().split(None, 1) if len(columns) == 2: identifier, data = columns if identifier in ('Query', 'Command'): data = data.strip() elif identifier == 'Neff': data = float(data) elif identifier in ('Searched_HMMs', 'Match_columns'): data = int(data) header[identifier] = data if identifier == 'Match_columns': qlen = data if in_hits and not header_only: if not line.strip(): # suboptimal way to handle block switch in_hits = False in_alis = True if self.alignments: continue else: break elif line.strip() == 'Done': in_hits = False in_alis = False break description = line[:34].split() rank = int(description[0]) id = description[1] pos = line[85:94].strip() start, end = map(int, pos.split('-')) qpos = line[75:84].strip() qstart, qend = map(int, qpos.split('-')) probability = float(line[35:40]) / 100.0 hit = HHpredHit(rank, id, start, end, qstart, qend, probability, qlen) hit.evalue = float(line[41:48]) hit.pvalue = float(line[49:56]) hit.score = float(line[57:63]) hit.ss_score = float(line[64:69]) hit.slength = int(line[94:].replace('(', '').replace(')', '')) hits[hit.rank] = hit alis[hit.rank] = {'q': [], 's': []} elif in_alis and not header_only: if line.startswith('Done'): in_alis = False break elif line.startswith('No '): c_rank = int(line[3:]) if c_rank not in hits: raise HHOutputFormatError( 'Alignment {0}. refers to a non-existing hit'. format(c_rank)) elif line.startswith('>'): hits[c_rank].name = line[1:].strip() elif line.startswith('Probab='): for pair in line.split(): key, value = pair.split('=') if key == 'Identities': hits[c_rank].identity = float( value.replace('%', '')) elif key == 'Similarity': hits[c_rank].similarity = float(value) elif key == 'Sum_probs': hits[c_rank].prob_sum = float(value) elif line.startswith('Q ') and not line[:11].rstrip() in ( 'Q Consensus', 'Q ss_pred', 'Q ss_conf', 'Q ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['q'].append(residue) has_alis = True elif line.startswith('T ') and not line[:11].rstrip() in ( 'T Consensus', 'T ss_pred', 'T ss_conf', 'T ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['s'].append(residue) if self.alignments and has_alis: for rank in alis: try: hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s']) except (KeyError, ValueError) as er: raise HHOutputFormatError( 'Corrupt alignment at hit No {0}.\n {1}'.format( rank, er)) del alis hits = HHpredHitList(hits.values()) hits.sort() ## add data obtained from the header to the HHpredHitList for identifier, data in header.items(): if identifier == 'Query': hits.query_name = data elif identifier == 'Match_columns': hits.match_columns = data elif identifier == 'No_of_seqs': hits.no_of_seqs = data elif identifier == 'Neff': hits.neff = data elif identifier == 'Searched_HMMs': hits.searched_hmms = data elif identifier == 'Date': hits.date = data elif identifier == 'Command': hits.command = data return hits
def _parse(self, stream, header_only): qlen = None in_hits = False in_alis = False has_alis = False c_rank = 0 header = {} hits = {} alis = {} for line in stream: if not in_hits and not in_alis: if line.replace(' ', '').startswith('NoHitProbE-value'): in_hits = True continue elif line.strip() == '': continue else: # parse header data (stuff above the hits table) columns = line.strip().split(None, 1) if len(columns) == 2: identifier, data = columns if identifier in ('Query', 'Command'): data = data.strip() elif identifier == 'Neff': data = float(data) elif identifier in ('Searched_HMMs', 'Match_columns'): data = int(data) header[identifier] = data if identifier == 'Match_columns': qlen = data if in_hits and not header_only: if not line.strip(): # suboptimal way to handle block switch in_hits = False in_alis = True if self.alignments: continue else: break elif line.strip() == 'Done': in_hits = False in_alis = False break description = line[:34].split() rank = int(description[0]) id = description[1] pos = line[85:94].strip() start, end = map(int, pos.split('-')) qpos = line[75:84].strip() qstart, qend = map(int, qpos.split('-')) probability = float(line[35:40]) / 100.0 hit = HHpredHit(rank, id, start, end, qstart, qend, probability, qlen) hit.evalue = float(line[41:48]) hit.pvalue = float(line[49:56]) hit.score = float(line[57:63]) hit.ss_score = float(line[64:69]) hit.slength = int(line[94:].replace('(', '').replace(')', '')) hits[hit.rank] = hit alis[hit.rank] = {'q': [], 's': []} elif in_alis and not header_only: if line.startswith('Done'): in_alis = False break elif line.startswith('No '): c_rank = int(line[3:]) if c_rank not in hits: raise HHOutputFormatError('Alignment {0}. refers to a non-existing hit'.format(c_rank)) elif line.startswith('>'): hits[c_rank].name = line[1:].strip() elif line.startswith('Probab='): for pair in line.split(): key, value = pair.split('=') if key == 'Identities': hits[c_rank].identity = float( value.replace('%', '')) elif key == 'Similarity': hits[c_rank].similarity = float(value) elif key == 'Sum_probs': hits[c_rank].prob_sum = float(value) elif line.startswith('Q ') and not line[:11].rstrip() in ('Q Consensus', 'Q ss_pred','Q ss_conf', 'Q ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['q'].append(residue) has_alis = True elif line.startswith('T ') and not line[:11].rstrip() in ('T Consensus', 'T ss_pred','T ss_conf', 'T ss_dssp'): for residue in line[22:]: if residue.isspace() or residue.isdigit(): break else: alis[c_rank]['s'].append(residue) if self.alignments and has_alis: for rank in alis: try: hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s']) except (KeyError, ValueError) as er: raise HHOutputFormatError('Corrupt alignment at hit No {0}.\n {1}'.format(rank, er)) del alis hits = HHpredHitList(hits.values()) hits.sort() ## add data obtained from the header to the HHpredHitList for identifier, data in header.items(): if identifier == 'Query': hits.query_name = data elif identifier == 'Match_columns': hits.match_columns = data elif identifier == 'No_of_seqs': hits.no_of_seqs = data elif identifier == 'Neff': hits.neff = data elif identifier == 'Searched_HMMs': hits.searched_hmms = data elif identifier == 'Date': hits.date = data elif identifier == 'Command': hits.command = data return hits