def load_all(self): tree = ET.parse('./test_data/Cross-ring-data.xml') sugars = {} by_name = {} for cls in tree.iterfind(".//class"): sugar_class = {} for case in cls.iterfind(".//sugar"): isomers = [tag.attrib['abbr'] for tag in case.findall(".//isomer")] frags = defaultdict(dict) for frag in case.iterfind(".//fragment"): data = [frag.attrib[n] for n in ["name", "mass_mono", "mass_pm_mono", "cleav1", "cleav2", "type"]] rec = self.FragmentRecord(*data) rec.composition = dict(map( lambda x: (x.attrib['element'], x.attrib['count']), frag.findall(".//composition"))) frags[rec.cleave_1, rec.cleave_2][rec.kind] = rec sugar_class[case.attrib['subclass']] = frags for name in isomers: by_name[name] = frags sugars[cls.attrib['name']] = sugar_class return by_name
def parse(self): for evt, entity in ET.iterparse(self.handle, ("start", "end")): if evt != "end": if entity.tag == "sugar": self.state = START elif entity.tag == 'residues': if self.state == START: self.state = RES else: raise GlycoCTXMLError("<residues> not the first section encountered") elif entity.tag == 'linkages': if self.state != RES: raise GlycoCTXMLError("<linkages> not following <residues>") elif entity.tag in {'repeat'}: raise GlycoCTXMLSectionUnsupported("<{}> section is not supported".format(entity.tag)) continue if entity.tag == "stemtype": self.buffer['stem'].append(entity.attrib['type'][1:]) self.buffer['configuration'].append(entity.attrib['type'][0]) elif entity.tag == "modification": self.buffer['modification'].append( (entity.attrib['type'], try_int(entity.attrib['pos_one']))) elif entity.tag == "basetype": id, anomer, superclass, ring_start, ring_end = basetype_unpacker(entity.attrib) superclass = superclass_map[superclass.upper()] anomer = anomer_map[anomer] id = int(id) modifications = OrderedMultiMap() mods = self.buffer.pop("modification", None) if mods is not None: for mod, pos in mods: modifications[pos] = modification_map[mod] is_reduced = "aldi" in modifications[1] if is_reduced: modifications.pop(1, "aldi") residue = monosaccharide.Monosaccharide( anomer=anomer, superclass=superclass, stem=self.buffer.pop('stem'), configuration=self.buffer.pop("configuration"), ring_start=ring_start, ring_end=ring_end, modifications=modifications, reduced=is_reduced, id=id) self.graph[id] = residue if self.root is None: self.root = residue elif entity.tag == "substituent": substituent_obj = substituent.Substituent(entity.attrib['name']) self.graph[int(entity.attrib['id'])] = substituent_obj elif entity.tag == "connection": parent_id = try_int(entity.attrib['parent']) child_id = try_int(entity.attrib['child']) parent_node = self.graph[parent_id] child_node = self.graph[child_id] link.Link( parent_node, child_node, parent_position=self.buffer.pop("parent_position"), child_position=self.buffer.pop("child_position"), parent_loss=self.buffer.pop('parent_loss'), child_loss=self.buffer.pop('child_loss'), id=self.buffer.pop('id')) elif entity.tag == "parent": self.buffer['parent_position'] = int(entity.attrib['pos']) elif entity.tag == "child": self.buffer['child_position'] = int(entity.attrib['pos']) elif entity.tag == "linkage": self.buffer["id"] = int(entity.attrib['id']) self.buffer["parent_loss"] = link_replacement_composition_map[entity.attrib['parentType']] self.buffer["child_loss"] = link_replacement_composition_map[entity.attrib['childType']] elif entity.tag == 'sugar': if self.root is not None: yield glycan.Glycan(self.root) self._reset()