Ejemplo n.º 1
0
    def _parse_hit(self, root_hit_elem, query_id):
        """Yield a generator object that transforms Iteration_hits XML elements into Hit objects (PRIVATE).

        :param root_hit_elem: root element of the Iteration_hits tag.
        :type root_hit_elem: XML element tag
        :param query_id: QueryResult ID of this Hit
        :type query_id: string

        """
        # Hit level processing
        # Hits are stored in the Iteration_hits tag, with the following
        # DTD
        # <!ELEMENT Hit (
        #        Hit_num,
        #        Hit_id,
        #        Hit_def,
        #        Hit_accession,
        #        Hit_len,
        #        Hit_hsps?)>

        # feed the loop below an empty list so iteration still works
        if root_hit_elem is None:
            root_hit_elem = []

        for hit_elem in root_hit_elem:

            # BLAST sometimes mangles the sequence IDs and descriptions, so we need
            # to extract the actual values.
            raw_hit_id = hit_elem.findtext('Hit_id')
            raw_hit_desc = hit_elem.findtext('Hit_def')
            if not self._use_raw_hit_ids:
                ids, descs, blast_hit_id = _extract_ids_and_descs(raw_hit_id, raw_hit_desc)
            else:
                ids, descs, blast_hit_id = [raw_hit_id], [raw_hit_desc], raw_hit_id

            hit_id, alt_hit_ids = ids[0], ids[1:]
            hit_desc, alt_hit_descs = descs[0], descs[1:]

            hsps = [hsp for hsp in
                    self._parse_hsp(hit_elem.find('Hit_hsps'),
                        query_id, hit_id)]

            hit = Hit(hsps)
            hit.description = hit_desc
            hit._id_alt = alt_hit_ids
            hit._description_alt = alt_hit_descs
            hit.blast_id = blast_hit_id

            for key, val_info in _ELEM_HIT.items():
                value = hit_elem.findtext(key)
                if value is not None:
                    caster = val_info[1]
                    # recast only if value is not intended to be str
                    if value is not None and caster is not str:
                        value = caster(value)
                    setattr(hit, val_info[0], value)

            # delete element after we finish parsing it
            hit_elem.clear()
            yield hit
Ejemplo n.º 2
0
    def _parse_hit(self, root_hit_elem, query_id):
        """Generator that transforms Iteration_hits XML elements into Hit objects.

        :param root_hit_elem: root element of the Iteration_hits tag.
        :type root_hit_elem: XML element tag
        :param query_id: QueryResult ID of this Hit
        :type query_id: string

        """
        # Hit level processing
        # Hits are stored in the Iteration_hits tag, with the following
        # DTD
        # <!ELEMENT Hit (
        #        Hit_num,
        #        Hit_id,
        #        Hit_def,
        #        Hit_accession,
        #        Hit_len,
        #        Hit_hsps?)>

        # feed the loop below an empty list so iteration still works
        if root_hit_elem is None:
            root_hit_elem = []

        for hit_elem in root_hit_elem:

            # create empty hit object
            hit_id = hit_elem.findtext('Hit_id')
            hit_desc = hit_elem.findtext('Hit_def')
            # handle blast searches against databases with Blast's IDs
            if hit_id.startswith('gnl|BL_ORD_ID|'):
                blast_hit_id = hit_id
                id_desc = hit_desc.split(' ', 1)
                hit_id = id_desc[0]
                try:
                    hit_desc = id_desc[1]
                except IndexError:
                    hit_desc = ''
            else:
                blast_hit_id = ''

            # combine primary ID and defline first before splitting
            full_id_desc = hit_id + ' ' + hit_desc
            id_descs = [(x.strip(), y.strip()) for x, y in \
                    [a.split(' ', 1) for a in full_id_desc.split(' >')]]
            hit_id, hit_desc = id_descs[0] 

            hsps = [hsp for hsp in
                    self._parse_hsp(hit_elem.find('Hit_hsps'),
                        query_id, hit_id)]

            hit = Hit(hsps)
            hit.description = hit_desc
            hit._id_alt = [x[0] for x in id_descs[1:]]
            hit._description_alt = [x[1] for x in id_descs[1:]]
            # blast_hit_id is only set if the hit ID is Blast-generated
            hit._blast_id = blast_hit_id

            for key, val_info in _ELEM_HIT.items():
                value = hit_elem.findtext(key)
                if value is not None:
                    caster = val_info[1]
                    # recast only if value is not intended to be str
                    if value is not None and caster is not str:
                        value = caster(value)
                    setattr(hit, val_info[0], value)

            # delete element after we finish parsing it
            hit_elem.clear()
            yield hit
Ejemplo n.º 3
0
    def _parse_hit(self, root_hit_elem, query_id):
        """Generator that transforms Iteration_hits XML elements into Hit objects.

        :param root_hit_elem: root element of the Iteration_hits tag.
        :type root_hit_elem: XML element tag
        :param query_id: QueryResult ID of this Hit
        :type query_id: string

        """
        # Hit level processing
        # Hits are stored in the Iteration_hits tag, with the following
        # DTD
        # <!ELEMENT Hit (
        #        Hit_num,
        #        Hit_id,
        #        Hit_def,
        #        Hit_accession,
        #        Hit_len,
        #        Hit_hsps?)>

        # feed the loop below an empty list so iteration still works
        if root_hit_elem is None:
            root_hit_elem = []

        for hit_elem in root_hit_elem:

            # create empty hit object
            hit_id = hit_elem.findtext('Hit_id')
            hit_desc = hit_elem.findtext('Hit_def')
            # handle blast searches against databases with Blast's IDs
            if hit_id.startswith('gnl|BL_ORD_ID|'):
                blast_hit_id = hit_id
                id_desc = hit_desc.split(' ', 1)
                hit_id = id_desc[0]
                try:
                    hit_desc = id_desc[1]
                except IndexError:
                    hit_desc = ''
            else:
                blast_hit_id = ''

            # combine primary ID and defline first before splitting
            full_id_desc = hit_id + ' ' + hit_desc
            id_descs = _extract_ids_and_descs(full_id_desc)
            hit_id, hit_desc = id_descs[0]

            hsps = [
                hsp for hsp in self._parse_hsp(hit_elem.find('Hit_hsps'),
                                               query_id, hit_id)
            ]

            hit = Hit(hsps)
            hit.description = hit_desc
            hit._id_alt = [x[0] for x in id_descs[1:]]
            hit._description_alt = [x[1] for x in id_descs[1:]]
            # blast_hit_id is only set if the hit ID is Blast-generated
            hit._blast_id = blast_hit_id

            for key, val_info in _ELEM_HIT.items():
                value = hit_elem.findtext(key)
                if value is not None:
                    caster = val_info[1]
                    # recast only if value is not intended to be str
                    if value is not None and caster is not str:
                        value = caster(value)
                    setattr(hit, val_info[0], value)

            # delete element after we finish parsing it
            hit_elem.clear()
            yield hit
Ejemplo n.º 4
0
    def _parse_hit(self, root_hit_elem, query_id):
        """Yield a generator object that transforms Iteration_hits XML elements into Hit objects (PRIVATE).

        :param root_hit_elem: root element of the Iteration_hits tag.
        :type root_hit_elem: XML element tag
        :param query_id: QueryResult ID of this Hit
        :type query_id: string

        """
        # Hit level processing
        # Hits are stored in the Iteration_hits tag, with the following
        # DTD
        # <!ELEMENT Hit (
        #        Hit_num,
        #        Hit_id,
        #        Hit_def,
        #        Hit_accession,
        #        Hit_len,
        #        Hit_hsps?)>

        # feed the loop below an empty list so iteration still works
        if root_hit_elem is None:
            root_hit_elem = []

        for hit_elem in root_hit_elem:

            # BLAST sometimes mangles the sequence IDs and descriptions, so we need
            # to extract the actual values.
            raw_hit_id = hit_elem.findtext('Hit_id')
            raw_hit_desc = hit_elem.findtext('Hit_def')
            if not self._use_raw_hit_ids:
                ids, descs, blast_hit_id = _extract_ids_and_descs(
                    raw_hit_id, raw_hit_desc)
            else:
                ids, descs, blast_hit_id = [raw_hit_id], [raw_hit_desc
                                                          ], raw_hit_id

            hit_id, alt_hit_ids = ids[0], ids[1:]
            hit_desc, alt_hit_descs = descs[0], descs[1:]

            hsps = [
                hsp for hsp in self._parse_hsp(hit_elem.find('Hit_hsps'),
                                               query_id, hit_id)
            ]

            hit = Hit(hsps)
            hit.description = hit_desc
            hit._id_alt = alt_hit_ids
            hit._description_alt = alt_hit_descs
            hit.blast_id = blast_hit_id

            for key, val_info in _ELEM_HIT.items():
                value = hit_elem.findtext(key)
                if value is not None:
                    caster = val_info[1]
                    # recast only if value is not intended to be str
                    if value is not None and caster is not str:
                        value = caster(value)
                    setattr(hit, val_info[0], value)

            # delete element after we finish parsing it
            hit_elem.clear()
            yield hit