def _parse_nfo(nfo_path: str, nfo_data: Optional[List[str]] = None) -> minidom.Document: # nfo files can contain XML or a URL to seed the XBMC metadata scrapers # It's also possible to have both (a URL after the XML metadata) # pyTivo only parses the XML metadata, but we'll try to stip the URL # from mixed XML/URL files. Returns `None` when XML can't be parsed. if nfo_data is None: with open(nfo_path, "r") as nfo_fh: nfo_data = [line.strip() for line in nfo_fh] xmldoc = None try: xmldoc = minidom.parseString(os.linesep.join(nfo_data)) except expat.ExpatError as err: if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN: # might be a URL outside the xml while len(nfo_data) > err.lineno: if len(nfo_data[-1]) == 0: nfo_data.pop() else: break if len(nfo_data) == err.lineno: # last non-blank line contains the error nfo_data.pop() return _parse_nfo(nfo_path, nfo_data) return xmldoc
class ConfigXml(): def __init__(self): self.m_curData = "" self.m_config = None def parse(self, pathname=None): if pathname is None: pathname = ConfigDlg.PathNameDft self.m_config = ConfigDlg.ConfigDft try: fp = open(pathname, 'r') except IOError, err: return self.m_config parser = expat.ParserCreate() parser.returns_unicode = False parser.StartElementHandler = self.onElemStart parser.CharacterDataHandler = self.onElemData parser.EndElementHandler = self.onElemEnd self.m_stack = [] try: parser.ParseFile(fp) except expat.ExpatError as e: print "%s: %s" % (pathname, expat.ErrorString(e.code)) fp.close() return self.m_config
class StylesheetReader(_ReaderBase): def __init__(self, force8Bit=0): _ReaderBase.__init__(self) self.force8Bit = force8Bit self._ssheetUri = '' return def fromUri(self, uri, baseUri='', ownerDoc=None, stripElements=None): self._ssheetUri = urllib.basejoin(baseUri, uri) result = _ReaderBase.fromUri(self, uri, baseUri, ownerDoc, stripElements) return result def fromStream(self, stream, baseUri='', ownerDoc=None, stripElements=None): if not xslt.g_registered: xslt.Register() self.initParser() self.initState(ownerDoc, baseUri) p = self.parser try: success = self.parser.ParseFile(stream) except XsltException: raise except Exception, e: for s in self._nodeStack: self.releaseNode(s) if p.ErrorCode: raise FtException(XML_PARSE_ERROR, p.ErrorLineNumber, p.ErrorColumnNumber, expat.ErrorString(p.ErrorCode)) else: raise self._ssheetUri = '' self.killParser() if not success: self.releaseNode(self._rootNode) self.releaseNode(self._ownerDoc) raise XsltException(Error.STYLESHEET_PARSE_ERROR, baseUri, p.ErrorLineNumber, p.ErrorColumnNumber, expat.ErrorString(p.ErrorCode)) self._completeTextNode() root = self._rootNode or self._ownerDoc if root.nodeType == Node.DOCUMENT_NODE: sheet = root.documentElement try: sheet.setup() except: sheet.reclaim() self.releaseNode(root) raise else: sheet = None rt = sheet or root return rt
def test_parse_again(self): parser = expat.ParserCreate() file = BytesIO(data) parser.ParseFile(file) with self.assertRaises(expat.error) as cm: parser.ParseFile(file) self.assertEqual(expat.ErrorString(cm.exception.code), expat.errors.XML_ERROR_FINISHED)
def check(self, lines): parser = expat.ParserCreate() try: for line in lines: parser.Parse(line) parser.Parse('\n') parser.Parse('', True) except expat.ExpatError, error: self._handle_style_error(error.lineno, 'xml/syntax', 5, expat.ErrorString(error.code))
def feed(self, data, isFinal = 0): if not self._parsing: self.reset() self._parsing = 1 self._cont_handler.startDocument() try: self._parser.Parse(data, isFinal) except expat.error as e: exc = SAXParseException(expat.ErrorString(e.code), e, self) self._err_handler.fatalError(exc)
def test_parse_again(self): parser = expat.ParserCreate() file = BytesIO(data) parser.ParseFile(file) # Issue 6676: ensure a meaningful exception is raised when attempting # to parse more than one XML document per xmlparser instance, # a limitation of the Expat library. with self.assertRaises(expat.error) as cm: parser.ParseFile(file) self.assertEqual(expat.ErrorString(cm.exception.code), expat.errors.XML_ERROR_FINISHED)
def parse_file(self, filepath): try: self.tree = ET.parse(filepath) self.root = self.tree.getroot() except IOError: print >> sys.stderr, '[I/O Error] No such file or directory' except EP.ExpatError as e: print >> sys.stderr, '[XML Error] ' \ + EP.ErrorString(e.code) + ': line ' \ + e.lineno + ', column ' + e.offset except: raise Exception('[Unknown error] Cannot parse \"' + filepath + '\"')
def parse(self, src_data): tb = ElementTree.TreeBuilder(XMLElement) def xml_decl(version, encoding, standalone): pass def start_element(name, attrs): attrs = OrderedDict(attrs[i:i+2] for i in range(0, len(attrs), 2)) elem = tb.start(name, attrs) elem.lineno = p.CurrentLineNumber elem.colno = p.CurrentColumnNumber def end_element(name): tb.end(name) def comment(data): tag = XMLElement.comment_tag start_element(tag, ()) tb.data(data) end_element(tag) def default(data): if data.strip(): self.error_logger('XML contains unexpected data', lineno = p.CurrentLineNumber, colno = p.CurrentColumnNumber) p = expat.ParserCreate() p.XmlDeclHandler = xml_decl p.StartElementHandler = start_element p.EndElementHandler = end_element p.CommentHandler = comment p.DefaultHandlerExpand = default p.buffer_text = True p.ordered_attributes = True try: p.Parse(src_data, True) root = tb.close() if root.tag == 'monkeyml': return root self.error_logger('XML does not contain an MWorks experiment ' '(root element is %s, not monkeyml)' % root.tag) except expat.ExpatError: self.error_logger(('Failed to parse XML: %s' % expat.ErrorString(p.ErrorCode)), lineno = p.ErrorLineNumber, colno = p.ErrorColumnNumber)
def fromStream(self, stream, ownerDoc=None): self.initParser() self.initState(ownerDoc) success = self.parser.ParseFile(stream) if not success: from xml.dom.ext import FtDomException from xml.dom import XML_PARSE_ERR if self._rootNode: ReleaseNode(self._rootNode) if self._ownerDoc: ReleaseNode(self._ownerDoc) raise FtDomException( XML_PARSE_ERR, (self.parser.ErrorLineNumber, self.parser.ErrorColumnNumber, expat.ErrorString(self.parser.ErrorCode))) self._completeTextNode() return self._rootNode or self._ownerDoc
def feed(self, data, isFinal=0): if not self._parsing: self.reset() self._parsing = 1 self._cont_handler.startDocument() try: # The isFinal parameter is internal to the expat reader. # If it is set to true, expat will check validity of the entire # document. When feeding chunks, they are not normally final - # except when invoked from close. self._parser.Parse(data, isFinal) except expat.error, e: exc = SAXParseException(expat.ErrorString(e.code), e, self) # FIXME: when to invoke error()? self._err_handler.fatalError(exc)
def fromStream(self, stream, baseUri='', ownerDoc=None, stripElements=None): if not xslt.g_registered: xslt.Register() self.initParser() self.initState(ownerDoc, baseUri) p = self.parser try: success = self.parser.ParseFile(stream) except XsltException: raise except Exception, e: for s in self._nodeStack: self.releaseNode(s) if p.ErrorCode: raise FtException(XML_PARSE_ERROR, p.ErrorLineNumber, p.ErrorColumnNumber, expat.ErrorString(p.ErrorCode)) else: raise
def _parse_nfo(nfo_path, nfo_data=None): # nfo files can contain XML or a URL to seed the XBMC metadata scrapers # It's also possible to have both (a URL after the XML metadata) # pyTivo only parses the XML metadata, but we'll try to stip the URL # from mixed XML/URL files. Returns `None` when XML can't be parsed. if nfo_data is None: nfo_data = [line.strip() for line in file(nfo_path, 'rU')] xmldoc = None try: xmldoc = minidom.parseString(os.linesep.join(nfo_data)) except expat.ExpatError, err: import ipdb ipdb.set_trace() if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN: # might be a URL outside the xml while len(nfo_data) > err.lineno: if len(nfo_data[-1]) == 0: nfo_data.pop() else: break if len(nfo_data) == err.lineno: # last non-blank line contains the error nfo_data.pop() return _parse_nfo(nfo_path, nfo_data)
<root attr1="value1" attr2="value2ὀ"> <myns:subelement xmlns:myns="http://www.python.org/namespace"> Contents of subelements </myns:subelement> <sub2><![CDATA[contents of CDATA section]]></sub2> &external_entity; </root> ''' # Produce UTF-8 output parser.returns_unicode = 0 try: parser.Parse(data, 1) except expat.error: print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) print '** Line', parser.ErrorLineNumber print '** Column', parser.ErrorColumnNumber print '** Byte', parser.ErrorByteIndex # Try the parse again, this time producing Unicode output parser = expat.ParserCreate(namespace_separator='!') parser.returns_unicode = 1 for name in HANDLER_NAMES: setattr(parser, name, getattr(out, name)) try: parser.Parse(data, 1) except expat.error: print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) print '** Line', parser.ErrorLineNumber
# Very simple test - Parse a file and print what happens # XXX TypeErrors on calling handlers, or on bad return values from a # handler, are obscure and unhelpful. import pyexpat from xml.parsers import expat from test_support import sortdict class Outputter: def StartElementHandler(self, name, attrs): print 'Start element:\n\t', repr(name), sortdict(attrs) def EndElementHandler(self, name): print 'End element:\n\t', repr(name) def CharacterDataHandler(self, data): data = data.strip() if data: print 'Character data:' print '\t', repr(data) def ProcessingInstructionHandler(self, target, data): print 'PI:\n\t', repr(target), repr(data) def StartNamespaceDeclHandler(self, prefix, uri): print 'NS decl:\n\t', repr(prefix), repr(uri) def EndNamespaceDeclHandler(self, prefix): print 'End of NS decl:\n\t', repr(prefix) def StartCdataSectionHandler(self): print 'Start of CDATA section' def EndCdataSectionHandler(self): print 'End of CDATA section' def CommentHandler(self, text): print 'Comment:\n\t', repr(text) def NotationDeclHandler(self, *args):
"""Parses input file into an ElementTree object, returns the object""" try: self.inputtree = ET.parse(self.inputfile) except IOError, detail: self.writeLog( 'parsing ' + self.inputfile + 'Error parsing input file - ' + str(detail), 'error') exit() except Exception, detail: #using expat for getting more detailed information about the exception if str(detail.__module__) == 'xml.parsers.expat': import xml.parsers.expat as expat errorstr = expat.ErrorString(detail.code) self.writeLog( 'parsing ' + self.inputfile + ': ' + 'malformed input file on line ' + str(detail.lineno) + ': ' + errorstr, 'error') else: self.writeLog( 'parsing ' + self.inputfile + ': ' + 'Unhandled exception while parsing inputfile \'' + self.inputfile + '\':' + str(detail), 'error') exit() #---------------------------------------------------------------------- def checkInput(self): """Quality check on elements in input tree. This function will ensure the program will proceed only if the element structure is as described in the sample
def _fixname(key): # type: (Text) -> Text try: name = _names[key] except KeyError: name = key if "}" in name: name = "{" + name _names[key] = name return name if sys.version_info[0:2] >= (3, 2): _undefined_entity_code = expat.errors.codes[expat.errors.XML_ERROR_UNDEFINED_ENTITY] # type: int else: _codes = {expat.ErrorString(i): i for i in range(0x100)} # type: Dict[str, int] _undefined_entity_code = _codes[expat.errors.XML_ERROR_UNDEFINED_ENTITY] class XMLParser(object): """ An XML parser with support for XHTML DTDs and all Python-supported encodings This implements the API defined by xml.etree.ElementTree.XMLParser, but supports XHTML DTDs (therefore allowing XHTML entities) and supports all encodings Python does, rather than just those supported by expat. """ def __init__(self, encoding=None): # type: (Optional[Text]) -> None self._parser = expat.ParserCreate(encoding, "}")
# type: (str) -> str try: name = _names[key] except KeyError: name = key if "}" in name: name = "{" + name _names[key] = name return name if sys.version_info[0:2] >= (3, 2): _undefined_entity_code = expat.errors.codes[ expat.errors.XML_ERROR_UNDEFINED_ENTITY] # type: int else: _codes = {expat.ErrorString(i): i for i in range(0x100)} # type: Dict[str, int] _undefined_entity_code = _codes[expat.errors.XML_ERROR_UNDEFINED_ENTITY] class XMLParser(object): """ An XML parser with support for XHTML DTDs and all Python-supported encodings This implements the API defined by xml.etree.ElementTree.XMLParser, but supports XHTML DTDs (therefore allowing XHTML entities) and supports all encodings Python does, rather than just those supported by expat. """ def __init__(self, encoding=None): # type: (Optional[str]) -> None
def endElement(self, tag): last_tag = self.el_stack.pop() if tag != last_tag: raise Exception( 'Error: mismatched tags (last_tage: %s, closing_tag: %s)', last_tag, tag) error_files = [] for mak_file in rec_glob('.', '*.mak'): f = open(mak_file, 'r') buf = f.read() f.close() handler = sax_callbacks() try: ctxt = expat.ParserCreate() handler.startDocument() ctxt.StartElementHandler = handler.startElement ctxt.EndElementHandler = handler.endElement ctxt.Parse(buf, 1) handler.endDocument() except Exception, e: if expat.ErrorString( e.code) != expat.errors.XML_ERROR_UNDEFINED_ENTITY: print "Parsing " + mak_file + " failed" print e
# The line below reads the XML and recursively inserts included XMLs # it also generates the list of objects describing the origin of each line # in the final XML (to facilitate future error detection) FINAL_XML, LINES_ORIGIN = include.handle_includes(INFILENAME) # The version ID is calculated as a hash of the XML defining the interface # it is encoded in UTF-8, to avoid problems with different locales wb.GLB.VER_ID = zlib.crc32(bytes(FINAL_XML.encode("utf-8"))) # We get the root element, and find the corresponding block try: EL_ROOT = et.fromstring(FINAL_XML) except et.ParseError as perr: # Handle the parsing error ROW, COL = perr.position print("Parsing error " + str(perr.code) + "(" + pe.ErrorString(perr.code) + ") in column " + str(COL) + " of the line " + str(ROW) + " of the concatenated XML:") print(FINAL_XML.split("\n")[ROW - 1]) print(COL * "-" + "|") print("The erroneous line was produced from the following sources:") ERR_SRC = include.find_error(LINES_ORIGIN, ROW) for src in ERR_SRC: print("file: " + src[0] + ", line:" + str(src[1])) sys.exit(1) TOP_NAME = EL_ROOT.attrib["top"] if "masters" in EL_ROOT.attrib: N_MASTERS = ex.exprval(EL_ROOT.attrib["masters"]) else: N_MASTERS = 1 # Find constants and feed them into the expressions module
def __report_error(self): errc = self.parser.ErrorCode msg = expat.ErrorString(errc) exc = saxlib.SAXParseException(msg, None, self) self.err_handler.fatalError(exc)
def _report(e, lines): errlines = lines[e.lineno-3:e.lineno+2] return _decorate(errlines, e.lineno-2, 3, e.offset, expat.ErrorString(e.code))
def lint_with_text(self, request, text): """Lint the given XML content. Raise an exception and set an error on koLastErrorService if there is a problem. """ text = eollib.convertToEOLFormat(text, eollib.EOL_LF) text = text.encode(request.encoding.python_encoding_name) cwd = request.cwd parser = expat.ParserCreate() results = koLintResults() try: # We need to remove the BOM on UTF-8 data to prevent expat from # crashing. We should check to see if this is still necesary # with every new Python version (we are at 2.0 now). utf8bom = u'\ufeff'.encode('utf-8') if text.startswith(utf8bom): parser.Parse(text[len(utf8bom):], 1) else: parser.Parse(text, 1) except expat.error: result = KoLintResult() result.description = "XML Error: %s" % expat.ErrorString( parser.ErrorCode) # find an existing, non-empty line on which to display result # XXX This logic should be in the LintDisplayer (should # rearchitect a la Trent's lintx) text = eollib.convertToEOLFormat(request.content, eollib.EOL_LF) lines = text.split("\n") if parser.ErrorLineNumber > len(lines): lineStart = len(lines) else: lineStart = parser.ErrorLineNumber lineStart = parser.ErrorLineNumber retreated = 0 while 1: if lineStart <= len(lines) and len(lines[lineStart - 1]): break elif lineStart == 0: log.warn( "Could not find a suitable line on which " "to display lint result for line %d.", parser.ErrorLineNumber) return None else: lineStart -= 1 retreated = 1 if retreated: result.description += " (error is on line %d, but displayed "\ "here)" % parser.ErrorLineNumber result.lineStart = result.lineEnd = lineStart result.columnStart = 1 result.columnEnd = len(lines[result.lineEnd - 1]) + 1 result.severity = result.SEV_ERROR results.addResult(result) return results
"""