def _parse_qresult(self): """Parses query results.""" # parse the queries for event, qresult_elem in self.xml_iter: # </Iteration> marks the end of a single query # which means we can process it if event == 'end' and qresult_elem.tag == 'Iteration': # we'll use the following schema # <!ELEMENT Iteration ( # Iteration_iter-num, # Iteration_query-ID?, # Iteration_query-def?, # Iteration_query-len?, # Iteration_hits?, # Iteration_stat?, # Iteration_message?)> # assign query attributes with fallbacks query_id = qresult_elem.findtext('Iteration_query-ID') if query_id is None: query_id = self._fallback['id'] query_desc = qresult_elem.findtext('Iteration_query-def') if query_desc is None: query_desc = self._fallback['description'] query_len = qresult_elem.findtext('Iteration_query-len') if query_len is None: query_len = self._fallback['len'] # handle blast searches against databases with Blast's IDs # 'Query_' marks the beginning of a BLAST+-generated ID, # 'lcl|' marks the beginning of a BLAST legacy-generated ID if query_id.startswith('Query_') or query_id.startswith('lcl|'): # store the Blast-generated query ID blast_query_id = query_id id_desc = query_desc.split(' ', 1) query_id = id_desc[0] try: query_desc = id_desc[1] except IndexError: query_desc = '' else: blast_query_id = '' hit_list, key_list = [], [] for hit in self._parse_hit(qresult_elem.find('Iteration_hits'), query_id): if hit: # need to keep track of hit IDs, since there could be duplicates, if hit.id in key_list: warnings.warn("Adding hit with BLAST-generated ID " "%r since hit ID %r is already present " "in query %r. Your BLAST database may contain " "duplicate entries." % (hit._blast_id, hit.id, query_id), BiopythonParserWarning) # fallback to Blast-generated IDs, if the ID is already present # and restore the desc, too hit.description = '%s %s' % (hit.id, hit.description) hit.id = hit._blast_id # and change the hit_id of the HSPs contained for hsp in hit: hsp.hit_id = hit._blast_id else: key_list.append(hit.id) hit_list.append(hit) # create qresult and assign its attributes qresult = QueryResult(hit_list, query_id) qresult.description = query_desc qresult.seq_len = int(query_len) qresult._blast_id = blast_query_id for key, value in self._meta.items(): setattr(qresult, key, value) # statistics are stored in Iteration_stat's 'grandchildren' with the # following DTD # <!ELEMENT Statistics ( # Statistics_db-num, # Statistics_db-len, # Statistics_hsp-len, # Statistics_eff-space, # Statistics_kappa, # Statistics_lambda, # Statistics_entropy)> stat_iter_elem = qresult_elem.find('Iteration_stat') if stat_iter_elem is not None: stat_elem = stat_iter_elem.find('Statistics') for key, val_info in _ELEM_QRESULT_OPT.items(): value = stat_elem.findtext(key) if value is not None: caster = val_info[1] # recast only if value is not intended to be str if value is not None and caster is not str: value = caster(value) setattr(qresult, val_info[0], value) # delete element after we finish parsing it qresult_elem.clear() yield qresult
def _parse_qresult(self): """Parses query results.""" # parse the queries for event, qresult_elem in self.xml_iter: # </Iteration> marks the end of a single query # which means we can process it if event == 'end' and qresult_elem.tag == 'Iteration': # we'll use the following schema # <!ELEMENT Iteration ( # Iteration_iter-num, # Iteration_query-ID?, # Iteration_query-def?, # Iteration_query-len?, # Iteration_hits?, # Iteration_stat?, # Iteration_message?)> # assign query attributes with fallbacks query_id = qresult_elem.findtext('Iteration_query-ID') if query_id is None: query_id = self._fallback['id'] query_desc = qresult_elem.findtext('Iteration_query-def') if query_desc is None: query_desc = self._fallback['description'] query_len = qresult_elem.findtext('Iteration_query-len') if query_len is None: query_len = self._fallback['len'] # handle blast searches against databases with Blast's IDs # 'Query_' marks the beginning of a BLAST+-generated ID, # 'lcl|' marks the beginning of a BLAST legacy-generated ID if query_id.startswith('Query_') or query_id.startswith( 'lcl|'): # store the Blast-generated query ID blast_query_id = query_id id_desc = query_desc.split(' ', 1) query_id = id_desc[0] try: query_desc = id_desc[1] except IndexError: query_desc = '' else: blast_query_id = '' hit_list, key_list = [], [] for hit in self._parse_hit(qresult_elem.find('Iteration_hits'), query_id): if hit: # need to keep track of hit IDs, since there could be duplicates, if hit.id in key_list: warnings.warn( "Adding hit with BLAST-generated ID " "%r since hit ID %r is already present " "in query %r. Your BLAST database may contain " "duplicate entries." % (hit._blast_id, hit.id, query_id), BiopythonParserWarning) # fallback to Blast-generated IDs, if the ID is already present # and restore the desc, too hit.description = '%s %s' % (hit.id, hit.description) hit.id = hit._blast_id # and change the hit_id of the HSPs contained for hsp in hit: hsp.hit_id = hit._blast_id else: key_list.append(hit.id) hit_list.append(hit) # create qresult and assign its attributes qresult = QueryResult(hit_list, query_id) qresult.description = query_desc qresult.seq_len = int(query_len) qresult._blast_id = blast_query_id for key, value in self._meta.items(): setattr(qresult, key, value) # statistics are stored in Iteration_stat's 'grandchildren' with the # following DTD # <!ELEMENT Statistics ( # Statistics_db-num, # Statistics_db-len, # Statistics_hsp-len, # Statistics_eff-space, # Statistics_kappa, # Statistics_lambda, # Statistics_entropy)> stat_iter_elem = qresult_elem.find('Iteration_stat') if stat_iter_elem is not None: stat_elem = stat_iter_elem.find('Statistics') for key, val_info in _ELEM_QRESULT_OPT.items(): value = stat_elem.findtext(key) if value is not None: caster = val_info[1] # recast only if value is not intended to be str if value is not None and caster is not str: value = caster(value) setattr(qresult, val_info[0], value) # delete element after we finish parsing it qresult_elem.clear() yield qresult