def parse_hsps(self, hit_placeholders): """Parse a HMMER2 hsp block, beginning with the hsp table.""" # HSPs may occur in different order than the hits # so store Hit objects separately first unordered_hits = {} while self.read_next(): if ( self.line.startswith("Alignments") or self.line.startswith("Histogram") or self.line == "//" ): break if ( self.line.startswith("Model") or self.line.startswith("Sequence") or self.line.startswith("--------") ): continue id_, domain, seq_f, seq_t, seq_compl, hmm_f, hmm_t, hmm_compl, score, evalue = ( self.line.split() ) frag = HSPFragment(id_, self.qresult.id) frag.alphabet = generic_protein if self._meta["program"] == "hmmpfam": frag.hit_start = int(hmm_f) - 1 frag.hit_end = int(hmm_t) frag.query_start = int(seq_f) - 1 frag.query_end = int(seq_t) elif self._meta["program"] == "hmmsearch": frag.query_start = int(hmm_f) - 1 frag.query_end = int(hmm_t) frag.hit_start = int(seq_f) - 1 frag.hit_end = int(seq_t) hsp = HSP([frag]) hsp.evalue = float(evalue) hsp.bitscore = float(score) hsp.domain_index = int(domain.split("/")[0]) if self._meta["program"] == "hmmpfam": hsp.hit_endtype = hmm_compl hsp.query_endtype = seq_compl elif self._meta["program"] == "hmmsearch": hsp.query_endtype = hmm_compl hsp.hit_endtype = seq_compl if id_ not in unordered_hits: placeholder = [p for p in hit_placeholders if p.id_ == id_][0] hit = placeholder.createHit([hsp]) unordered_hits[id_] = hit else: hit = unordered_hits[id_] hsp.hit_description = hit.description hit.append(hsp) # The placeholder list is in the correct order, so use that order for # the Hit objects in the qresult for p in hit_placeholders: self.qresult.append(unordered_hits[p.id_])
def parse_hsps(self, hit_placeholders): """Parse a HMMER2 hsp block, beginning with the hsp table.""" # HSPs may occur in different order than the hits # so store Hit objects separately first unordered_hits = {} while self.read_next(): if self.line.startswith('Alignments') or \ self.line.startswith('Histogram') or \ self.line == '//': break if self.line.startswith('Model') or \ self.line.startswith('Sequence') or \ self.line.startswith('--------'): continue id_, domain, seq_f, seq_t, seq_compl, hmm_f, hmm_t, hmm_compl, \ score, evalue = self.line.split() frag = HSPFragment(id_, self.qresult.id) frag.alphabet = generic_protein if self._meta['program'] == 'hmmpfam': frag.hit_start = int(hmm_f) - 1 frag.hit_end = int(hmm_t) frag.query_start = int(seq_f) - 1 frag.query_end = int(seq_t) elif self._meta['program'] == 'hmmsearch': frag.query_start = int(hmm_f) - 1 frag.query_end = int(hmm_t) frag.hit_start = int(seq_f) - 1 frag.hit_end = int(seq_t) hsp = HSP([frag]) hsp.evalue = float(evalue) hsp.bitscore = float(score) hsp.domain_index = int(domain.split('/')[0]) if self._meta['program'] == 'hmmpfam': hsp.hit_endtype = hmm_compl hsp.query_endtype = seq_compl elif self._meta['program'] == 'hmmsearch': hsp.query_endtype = hmm_compl hsp.hit_endtype = seq_compl if id_ not in unordered_hits: placeholder = [ p for p in hit_placeholders if p.id_ == id_][0] hit = placeholder.createHit([hsp]) unordered_hits[id_] = hit else: hit = unordered_hits[id_] hsp.hit_description = hit.description hit.append(hsp) # The placeholder list is in the correct order, so use that order for # the Hit objects in the qresult for p in hit_placeholders: self.qresult.append(unordered_hits[p.id_])
def _create_qresult(self, hit_blocks): """Create the Biopython data structures from the parsed data (PRIVATE).""" query_id = self.query_id hit_dict = OrderedDict() for output_index, block in enumerate(hit_blocks): hit_id = block['hit_id'] frag = HSPFragment(hit_id, query_id) # frag.alphabet = generic_protein if block['query_start']: frag.query_start = block['query_start'] - 1 else: frag.query_start = block['query_start'] frag.query_end = block['query_end'] if block['hit_start']: frag.hit_start = block['hit_start'] - 1 else: frag.hit_start = block['hit_start'] frag.hit_end = block['hit_end'] frag.hit = block['hit_seq'] frag.query = block['query_seq'] hsp = HSP([frag]) hsp.hit_id = hit_id hsp.output_index = output_index hsp.query_id = query_id hsp.hit_description = block['description'] is_included = True # Should everything should be included? hsp.is_included = is_included hsp.evalue = block['evalue'] hsp.score = block['score'] hsp.prob = block['prob'] hsp.hit_seq_len = block['hit_seq_len'] hsp.text = block['text'] if hit_id not in hit_dict: hit = Hit([hsp], hit_id) hit.description = block['description'] hit.is_included = is_included hit.evalue = block['evalue'] hit.score = block['score'] hit_dict[hit_id] = hit else: hit_dict[hit_id].append(hsp) qresult = QueryResult(hit_dict.values(), query_id) qresult.program = _PROGRAM qresult.seq_len = self.seq_len return [qresult]
def _create_qresult(self, hit_blocks): """Create the Biopython data structures from the parsed data (PRIVATE).""" query_id = self.query_id hit_dict = OrderedDict() for output_index, block in enumerate(hit_blocks): hit_id = block["hit_id"] frag = HSPFragment(hit_id, query_id) frag.molecule_type = "protein" frag.query_start = block["query_start"] - 1 frag.query_end = block["query_end"] frag.hit_start = block["hit_start"] - 1 frag.hit_end = block["hit_end"] frag.hit = block["hit_seq"] frag.query = block["query_seq"] hsp = HSP([frag]) hsp.hit_id = hit_id hsp.output_index = output_index hsp.query_id = query_id hsp.hit_description = block["description"] is_included = True # Should everything should be included? hsp.is_included = is_included hsp.evalue = block["evalue"] hsp.score = block["score"] hsp.prob = block["prob"] if hit_id not in hit_dict: hit = Hit([hsp], hit_id) hit.description = block["description"] hit.is_included = is_included hit.evalue = block["evalue"] hit.score = block["score"] hit_dict[hit_id] = hit else: hit_dict[hit_id].append(hsp) qresult = QueryResult(hit_dict.values(), query_id) qresult.program = _PROGRAM qresult.seq_len = self.seq_len return [qresult]
def _create_qresult(self, hit_blocks): """Create the Biopython data structures from the parsed data (PRIVATE).""" query_id = self.query_id hit_dict = OrderedDict() for output_index, block in enumerate(hit_blocks): hit_id = block['hit_id'] frag = HSPFragment(hit_id, query_id) frag.alphabet = generic_protein frag.query_start = block['query_start'] - 1 frag.query_end = block['query_end'] frag.hit_start = block['hit_start'] - 1 frag.hit_end = block['hit_end'] frag.hit = block['hit_seq'] frag.query = block['query_seq'] hsp = HSP([frag]) hsp.hit_id = hit_id hsp.output_index = output_index hsp.query_id = query_id hsp.hit_description = block['description'] is_included = True # Should everything should be included? hsp.is_included = is_included hsp.evalue = block['evalue'] hsp.score = block['score'] hsp.prob = block['prob'] if hit_id not in hit_dict: hit = Hit([hsp], hit_id) hit.description = block['description'] hit.is_included = is_included hit.evalue = block['evalue'] hit.score = block['score'] hit_dict[hit_id] = hit else: hit_dict[hit_id].append(hsp) qresult = QueryResult(hit_dict.values(), query_id) qresult.program = _PROGRAM qresult.seq_len = self.seq_len return [qresult]