def _parse_feature(element): feature = SeqFeature.SeqFeature() for k, v in element.attrib.items(): feature.qualifiers[k] = v feature.type = element.attrib.get('type', '') if 'id' in element.attrib: feature.id = element.attrib['id'] for feature_element in element: if feature_element.tag == NS + 'location': position_elements = feature_element.findall(NS + 'position') if position_elements: element = position_elements[0] start_position = _parse_position(element, -1) end_position = _parse_position(element) else: element = feature_element.findall(NS + 'begin')[0] start_position = _parse_position(element, -1) element = feature_element.findall(NS + 'end')[0] end_position = _parse_position(element) feature.location = SeqFeature.FeatureLocation(start_position, end_position) else: try: feature.qualifiers[feature_element.tag.replace(NS, '')] = feature_element.text except: pass # skip unparsable tag self.ParsedSeqRecord.features.append(feature)
def _make_seqfeature(name, from_res, to_res, description, ft_id): """Construct SeqFeature from feature data from parser (PRIVATE).""" loc = SeqFeature.FeatureLocation(_make_position(from_res, -1), _make_position(to_res, 0)) if not ft_id: ft_id = "<unknown id>" # The default in SeqFeature object return SeqFeature.SeqFeature(loc, type=name, id=ft_id, qualifiers={"description": description})
def _parse_dbReference(element): self.ParsedSeqRecord.dbxrefs.append(element.attrib['type'] + ':' + element.attrib['id']) #e.g. # <dbReference type="PDB" key="11" id="2GEZ"> # <property value="X-ray" type="method"/> # <property value="2.60 A" type="resolution"/> # <property value="A/C/E/G=1-192, B/D/F/H=193-325" type="chains"/> # </dbReference> if 'type' in element.attrib: if element.attrib['type'] == 'PDB': method = "" resolution = "" for ref_element in element: if ref_element.tag == NS + 'property': dat_type = ref_element.attrib['type'] if dat_type == 'method': method = ref_element.attrib['value'] if dat_type == 'resolution': resolution = ref_element.attrib['value'] if dat_type == 'chains': pairs = ref_element.attrib['value'].split(',') for elem in pairs: pair = elem.strip().split('=') if pair[1] != '-': #TODO - How best to store these, do SeqFeatures make sense? feature = SeqFeature.SeqFeature() feature.type = element.attrib['type'] feature.qualifiers['name'] = element.attrib['id'] feature.qualifiers['method'] = method feature.qualifiers['resolution'] = resolution feature.qualifiers['chains'] = pair[0].split('/') start = int(pair[1].split('-')[0]) - 1 end = int(pair[1].split('-')[1]) feature.location = SeqFeature.FeatureLocation(start, end) #self.ParsedSeqRecord.features.append(feature) for ref_element in element: if ref_element.tag == NS + 'property': pass # this data cannot be fitted in a seqrecord object with a simple list. however at least ensembl and EMBL parsing can be improved to add entries in dbxrefs