def parse_opf_xml (rawxml): rawxml, encoding = xml_to_unicode(rawxml, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True) rawxml = rawxml[rawxml.find('<'):] tree = etree.fromstring(rawxml, etree.XMLParser(recover=True)) opf = Storage() for section in ('metadata', 'manifest', 'spine', 'guide'): subtree = tree.find('opf:%s' % section, namespaces=NAMESPACES) if subtree is not None: for el in subtree.getchildren(): opf.setdefault(section, []).append((el.tag, el.attrib, el.text)) return opf