Beispiel #1
0
def _parse_nfo(nfo_path: str,
               nfo_data: Optional[List[str]] = None) -> minidom.Document:
    # nfo files can contain XML or a URL to seed the XBMC metadata scrapers
    # It's also possible to have both (a URL after the XML metadata)
    # pyTivo only parses the XML metadata, but we'll try to stip the URL
    # from mixed XML/URL files.  Returns `None` when XML can't be parsed.
    if nfo_data is None:
        with open(nfo_path, "r") as nfo_fh:
            nfo_data = [line.strip() for line in nfo_fh]
    xmldoc = None
    try:
        xmldoc = minidom.parseString(os.linesep.join(nfo_data))
    except expat.ExpatError as err:
        if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN:
            # might be a URL outside the xml
            while len(nfo_data) > err.lineno:
                if len(nfo_data[-1]) == 0:
                    nfo_data.pop()
                else:
                    break
            if len(nfo_data) == err.lineno:
                # last non-blank line contains the error
                nfo_data.pop()
                return _parse_nfo(nfo_path, nfo_data)
    return xmldoc
Beispiel #2
0
class ConfigXml():
    def __init__(self):
        self.m_curData = ""
        self.m_config = None

    def parse(self, pathname=None):
        if pathname is None:
            pathname = ConfigDlg.PathNameDft
        self.m_config = ConfigDlg.ConfigDft
        try:
            fp = open(pathname, 'r')
        except IOError, err:
            return self.m_config
        parser = expat.ParserCreate()
        parser.returns_unicode = False
        parser.StartElementHandler = self.onElemStart
        parser.CharacterDataHandler = self.onElemData
        parser.EndElementHandler = self.onElemEnd
        self.m_stack = []
        try:
            parser.ParseFile(fp)
        except expat.ExpatError as e:
            print "%s: %s" % (pathname, expat.ErrorString(e.code))
        fp.close()
        return self.m_config
Beispiel #3
0
class StylesheetReader(_ReaderBase):
    def __init__(self, force8Bit=0):
        _ReaderBase.__init__(self)
        self.force8Bit = force8Bit
        self._ssheetUri = ''
        return

    def fromUri(self, uri, baseUri='', ownerDoc=None, stripElements=None):
        self._ssheetUri = urllib.basejoin(baseUri, uri)
        result = _ReaderBase.fromUri(self, uri, baseUri, ownerDoc,
                                     stripElements)
        return result

    def fromStream(self,
                   stream,
                   baseUri='',
                   ownerDoc=None,
                   stripElements=None):
        if not xslt.g_registered:
            xslt.Register()
        self.initParser()
        self.initState(ownerDoc, baseUri)
        p = self.parser
        try:
            success = self.parser.ParseFile(stream)
        except XsltException:
            raise
        except Exception, e:
            for s in self._nodeStack:
                self.releaseNode(s)
            if p.ErrorCode:
                raise FtException(XML_PARSE_ERROR, p.ErrorLineNumber,
                                  p.ErrorColumnNumber,
                                  expat.ErrorString(p.ErrorCode))
            else:
                raise
        self._ssheetUri = ''
        self.killParser()
        if not success:
            self.releaseNode(self._rootNode)
            self.releaseNode(self._ownerDoc)
            raise XsltException(Error.STYLESHEET_PARSE_ERROR, baseUri,
                                p.ErrorLineNumber, p.ErrorColumnNumber,
                                expat.ErrorString(p.ErrorCode))
        self._completeTextNode()

        root = self._rootNode or self._ownerDoc
        if root.nodeType == Node.DOCUMENT_NODE:
            sheet = root.documentElement
            try:
                sheet.setup()
            except:
                sheet.reclaim()
                self.releaseNode(root)
                raise
        else:
            sheet = None
        rt = sheet or root
        return rt
 def test_parse_again(self):
     parser = expat.ParserCreate()
     file = BytesIO(data)
     parser.ParseFile(file)
     with self.assertRaises(expat.error) as cm:
         parser.ParseFile(file)
     self.assertEqual(expat.ErrorString(cm.exception.code),
                      expat.errors.XML_ERROR_FINISHED)
Beispiel #5
0
 def check(self, lines):
     parser = expat.ParserCreate()
     try:
         for line in lines:
             parser.Parse(line)
             parser.Parse('\n')
         parser.Parse('', True)
     except expat.ExpatError, error:
         self._handle_style_error(error.lineno, 'xml/syntax', 5, expat.ErrorString(error.code))
Beispiel #6
0
 def feed(self, data, isFinal = 0):
     if not self._parsing:
         self.reset()
         self._parsing = 1
         self._cont_handler.startDocument()
     try:
         self._parser.Parse(data, isFinal)
     except expat.error as e:
         exc = SAXParseException(expat.ErrorString(e.code), e, self)
         self._err_handler.fatalError(exc)
 def test_parse_again(self):
     parser = expat.ParserCreate()
     file = BytesIO(data)
     parser.ParseFile(file)
     # Issue 6676: ensure a meaningful exception is raised when attempting
     # to parse more than one XML document per xmlparser instance,
     # a limitation of the Expat library.
     with self.assertRaises(expat.error) as cm:
         parser.ParseFile(file)
     self.assertEqual(expat.ErrorString(cm.exception.code),
                       expat.errors.XML_ERROR_FINISHED)
Beispiel #8
0
 def parse_file(self, filepath):
     try:
         self.tree = ET.parse(filepath)
         self.root = self.tree.getroot()
     except IOError:
         print >> sys.stderr, '[I/O Error] No such file or directory'
     except EP.ExpatError as e:
         print >> sys.stderr, '[XML Error] ' \
             + EP.ErrorString(e.code) + ': line ' \
             + e.lineno + ', column ' + e.offset
     except:
         raise Exception('[Unknown error] Cannot parse \"' + filepath +
                         '\"')
Beispiel #9
0
    def parse(self, src_data):
        tb = ElementTree.TreeBuilder(XMLElement)

        def xml_decl(version, encoding, standalone):
            pass

        def start_element(name, attrs):
            attrs = OrderedDict(attrs[i:i+2] for i in range(0, len(attrs), 2))
            elem = tb.start(name, attrs)
            elem.lineno = p.CurrentLineNumber
            elem.colno = p.CurrentColumnNumber

        def end_element(name):
            tb.end(name)

        def comment(data):
            tag = XMLElement.comment_tag
            start_element(tag, ())
            tb.data(data)
            end_element(tag)

        def default(data):
            if data.strip():
                self.error_logger('XML contains unexpected data',
                                  lineno = p.CurrentLineNumber,
                                  colno = p.CurrentColumnNumber)

        p = expat.ParserCreate()

        p.XmlDeclHandler = xml_decl
        p.StartElementHandler = start_element
        p.EndElementHandler = end_element
        p.CommentHandler = comment
        p.DefaultHandlerExpand = default

        p.buffer_text = True
        p.ordered_attributes = True

        try:
            p.Parse(src_data, True)
            root = tb.close()
            if root.tag == 'monkeyml':
                return root
            self.error_logger('XML does not contain an MWorks experiment '
                              '(root element is %s, not monkeyml)' % root.tag)
        except expat.ExpatError:
            self.error_logger(('Failed to parse XML: %s' %
                               expat.ErrorString(p.ErrorCode)),
                              lineno = p.ErrorLineNumber,
                              colno = p.ErrorColumnNumber)
Beispiel #10
0
 def fromStream(self, stream, ownerDoc=None):
     self.initParser()
     self.initState(ownerDoc)
     success = self.parser.ParseFile(stream)
     if not success:
         from xml.dom.ext import FtDomException
         from xml.dom import XML_PARSE_ERR
         if self._rootNode: ReleaseNode(self._rootNode)
         if self._ownerDoc: ReleaseNode(self._ownerDoc)
         raise FtDomException(
             XML_PARSE_ERR,
             (self.parser.ErrorLineNumber, self.parser.ErrorColumnNumber,
              expat.ErrorString(self.parser.ErrorCode)))
     self._completeTextNode()
     return self._rootNode or self._ownerDoc
Beispiel #11
0
    def feed(self, data, isFinal=0):
        if not self._parsing:
            self.reset()
            self._parsing = 1
            self._cont_handler.startDocument()

        try:
            # The isFinal parameter is internal to the expat reader.
            # If it is set to true, expat will check validity of the entire
            # document. When feeding chunks, they are not normally final -
            # except when invoked from close.
            self._parser.Parse(data, isFinal)
        except expat.error, e:
            exc = SAXParseException(expat.ErrorString(e.code), e, self)
            # FIXME: when to invoke error()?
            self._err_handler.fatalError(exc)
Beispiel #12
0
 def fromStream(self,
                stream,
                baseUri='',
                ownerDoc=None,
                stripElements=None):
     if not xslt.g_registered:
         xslt.Register()
     self.initParser()
     self.initState(ownerDoc, baseUri)
     p = self.parser
     try:
         success = self.parser.ParseFile(stream)
     except XsltException:
         raise
     except Exception, e:
         for s in self._nodeStack:
             self.releaseNode(s)
         if p.ErrorCode:
             raise FtException(XML_PARSE_ERROR, p.ErrorLineNumber,
                               p.ErrorColumnNumber,
                               expat.ErrorString(p.ErrorCode))
         else:
             raise
Beispiel #13
0
def _parse_nfo(nfo_path, nfo_data=None):
    # nfo files can contain XML or a URL to seed the XBMC metadata scrapers
    # It's also possible to have both (a URL after the XML metadata)
    # pyTivo only parses the XML metadata, but we'll try to stip the URL
    # from mixed XML/URL files.  Returns `None` when XML can't be parsed.
    if nfo_data is None:
        nfo_data = [line.strip() for line in file(nfo_path, 'rU')]
    xmldoc = None
    try:
        xmldoc = minidom.parseString(os.linesep.join(nfo_data))
    except expat.ExpatError, err:
        import ipdb
        ipdb.set_trace()
        if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN:
            # might be a URL outside the xml
            while len(nfo_data) > err.lineno:
                if len(nfo_data[-1]) == 0:
                    nfo_data.pop()
                else:
                    break
            if len(nfo_data) == err.lineno:
                # last non-blank line contains the error
                nfo_data.pop()
                return _parse_nfo(nfo_path, nfo_data)
Beispiel #14
0
<root attr1="value1" attr2="value2&#8000;">
<myns:subelement xmlns:myns="http://www.python.org/namespace">
     Contents of subelements
</myns:subelement>
<sub2><![CDATA[contents of CDATA section]]></sub2>
&external_entity;
</root>
'''

# Produce UTF-8 output
parser.returns_unicode = 0
try:
    parser.Parse(data, 1)
except expat.error:
    print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
    print '** Line', parser.ErrorLineNumber
    print '** Column', parser.ErrorColumnNumber
    print '** Byte', parser.ErrorByteIndex

# Try the parse again, this time producing Unicode output
parser = expat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1

for name in HANDLER_NAMES:
    setattr(parser, name, getattr(out, name))
try:
    parser.Parse(data, 1)
except expat.error:
    print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
    print '** Line', parser.ErrorLineNumber
Beispiel #15
0
# Very simple test - Parse a file and print what happens
# XXX TypeErrors on calling handlers, or on bad return values from a
# handler, are obscure and unhelpful.
import pyexpat
from xml.parsers import expat
from test_support import sortdict

class Outputter:
    def StartElementHandler(self, name, attrs):
        print 'Start element:\n\t', repr(name), sortdict(attrs)
    def EndElementHandler(self, name):
        print 'End element:\n\t', repr(name)
    def CharacterDataHandler(self, data):
        data = data.strip()
        if data:
            print 'Character data:'
            print '\t', repr(data)
    def ProcessingInstructionHandler(self, target, data):
        print 'PI:\n\t', repr(target), repr(data)
    def StartNamespaceDeclHandler(self, prefix, uri):
        print 'NS decl:\n\t', repr(prefix), repr(uri)
    def EndNamespaceDeclHandler(self, prefix):
        print 'End of NS decl:\n\t', repr(prefix)
    def StartCdataSectionHandler(self):
        print 'Start of CDATA section'
    def EndCdataSectionHandler(self):
        print 'End of CDATA section'
    def CommentHandler(self, text):
        print 'Comment:\n\t', repr(text)
    def NotationDeclHandler(self, *args):
Beispiel #16
0
        """Parses input file into an ElementTree object, returns the object"""

        try:
            self.inputtree = ET.parse(self.inputfile)

        except IOError, detail:
            self.writeLog(
                'parsing ' + self.inputfile + 'Error parsing input file - ' +
                str(detail), 'error')
            exit()

        except Exception, detail:
            #using expat for getting more detailed information about the exception
            if str(detail.__module__) == 'xml.parsers.expat':
                import xml.parsers.expat as expat
                errorstr = expat.ErrorString(detail.code)
                self.writeLog(
                    'parsing ' + self.inputfile + ': ' +
                    'malformed input file on line ' + str(detail.lineno) +
                    ': ' + errorstr, 'error')

            else:
                self.writeLog(
                    'parsing ' + self.inputfile + ': ' +
                    'Unhandled exception while parsing inputfile \'' +
                    self.inputfile + '\':' + str(detail), 'error')
            exit()

    #----------------------------------------------------------------------
    def checkInput(self):
        """Quality check on elements in input tree. This function will ensure the program will proceed only if the element structure is as described in the sample
Beispiel #17
0
def _fixname(key):
    # type: (Text) -> Text
    try:
        name = _names[key]
    except KeyError:
        name = key
        if "}" in name:
            name = "{" + name
        _names[key] = name
    return name


if sys.version_info[0:2] >= (3, 2):
    _undefined_entity_code = expat.errors.codes[expat.errors.XML_ERROR_UNDEFINED_ENTITY]  # type: int
else:
    _codes = {expat.ErrorString(i): i for i in range(0x100)}  # type: Dict[str, int]
    _undefined_entity_code = _codes[expat.errors.XML_ERROR_UNDEFINED_ENTITY]


class XMLParser(object):
    """
    An XML parser with support for XHTML DTDs and all Python-supported encodings

    This implements the API defined by
    xml.etree.ElementTree.XMLParser, but supports XHTML DTDs
    (therefore allowing XHTML entities) and supports all encodings
    Python does, rather than just those supported by expat.
    """
    def __init__(self, encoding=None):
        # type: (Optional[Text]) -> None
        self._parser = expat.ParserCreate(encoding, "}")
Beispiel #18
0
    # type: (str) -> str
    try:
        name = _names[key]
    except KeyError:
        name = key
        if "}" in name:
            name = "{" + name
        _names[key] = name
    return name


if sys.version_info[0:2] >= (3, 2):
    _undefined_entity_code = expat.errors.codes[
        expat.errors.XML_ERROR_UNDEFINED_ENTITY]  # type: int
else:
    _codes = {expat.ErrorString(i): i
              for i in range(0x100)}  # type: Dict[str, int]
    _undefined_entity_code = _codes[expat.errors.XML_ERROR_UNDEFINED_ENTITY]


class XMLParser(object):
    """
    An XML parser with support for XHTML DTDs and all Python-supported encodings

    This implements the API defined by
    xml.etree.ElementTree.XMLParser, but supports XHTML DTDs
    (therefore allowing XHTML entities) and supports all encodings
    Python does, rather than just those supported by expat.
    """
    def __init__(self, encoding=None):
        # type: (Optional[str]) -> None
Beispiel #19
0
    def endElement(self, tag):
        last_tag = self.el_stack.pop()
        if tag != last_tag:
            raise Exception(
                'Error: mismatched tags (last_tage: %s, closing_tag: %s)',
                last_tag, tag)


error_files = []
for mak_file in rec_glob('.', '*.mak'):
    f = open(mak_file, 'r')
    buf = f.read()
    f.close()

    handler = sax_callbacks()

    try:
        ctxt = expat.ParserCreate()
        handler.startDocument()
        ctxt.StartElementHandler = handler.startElement
        ctxt.EndElementHandler = handler.endElement
        ctxt.Parse(buf, 1)
        handler.endDocument()

    except Exception, e:
        if expat.ErrorString(
                e.code) != expat.errors.XML_ERROR_UNDEFINED_ENTITY:
            print "Parsing " + mak_file + " failed"
            print e
Beispiel #20
0
# The line below reads the XML and recursively inserts included XMLs
# it also generates the list of objects describing the origin of each line
# in the final XML (to facilitate future error detection)
FINAL_XML, LINES_ORIGIN = include.handle_includes(INFILENAME)

# The version ID is calculated as a hash of the XML defining the interface
# it is encoded in UTF-8, to avoid problems with different locales
wb.GLB.VER_ID = zlib.crc32(bytes(FINAL_XML.encode("utf-8")))

# We get the root element, and find the corresponding block
try:
    EL_ROOT = et.fromstring(FINAL_XML)
except et.ParseError as perr:
    # Handle the parsing error
    ROW, COL = perr.position
    print("Parsing error " + str(perr.code) + "(" + pe.ErrorString(perr.code) +
          ") in column " + str(COL) + " of the line " + str(ROW) +
          " of the concatenated XML:")
    print(FINAL_XML.split("\n")[ROW - 1])
    print(COL * "-" + "|")
    print("The erroneous line was produced from the following sources:")
    ERR_SRC = include.find_error(LINES_ORIGIN, ROW)
    for src in ERR_SRC:
        print("file: " + src[0] + ", line:" + str(src[1]))
    sys.exit(1)
TOP_NAME = EL_ROOT.attrib["top"]
if "masters" in EL_ROOT.attrib:
    N_MASTERS = ex.exprval(EL_ROOT.attrib["masters"])
else:
    N_MASTERS = 1
# Find constants and feed them into the expressions module
Beispiel #21
0
 def __report_error(self):
     errc = self.parser.ErrorCode
     msg = expat.ErrorString(errc)
     exc = saxlib.SAXParseException(msg, None, self)
     self.err_handler.fatalError(exc)
Beispiel #22
0
def _report(e, lines):
    errlines = lines[e.lineno-3:e.lineno+2]
    return _decorate(errlines, e.lineno-2, 3, e.offset,
                    expat.ErrorString(e.code))
Beispiel #23
0
    def lint_with_text(self, request, text):
        """Lint the given XML content.
        
        Raise an exception and set an error on koLastErrorService if there
        is a problem.
        """

        text = eollib.convertToEOLFormat(text, eollib.EOL_LF)
        text = text.encode(request.encoding.python_encoding_name)
        cwd = request.cwd

        parser = expat.ParserCreate()
        results = koLintResults()

        try:
            # We need to remove the BOM on UTF-8 data to prevent expat from
            # crashing. We should check to see if this is still necesary
            # with every new Python version (we are at 2.0 now).
            utf8bom = u'\ufeff'.encode('utf-8')
            if text.startswith(utf8bom):
                parser.Parse(text[len(utf8bom):], 1)
            else:
                parser.Parse(text, 1)

        except expat.error:
            result = KoLintResult()
            result.description = "XML Error: %s" % expat.ErrorString(
                parser.ErrorCode)

            # find an existing, non-empty line on which to display result
            # XXX This logic should be in the LintDisplayer (should
            #     rearchitect a la Trent's lintx)
            text = eollib.convertToEOLFormat(request.content, eollib.EOL_LF)
            lines = text.split("\n")
            if parser.ErrorLineNumber > len(lines):
                lineStart = len(lines)
            else:
                lineStart = parser.ErrorLineNumber
            lineStart = parser.ErrorLineNumber
            retreated = 0
            while 1:
                if lineStart <= len(lines) and len(lines[lineStart - 1]):
                    break
                elif lineStart == 0:
                    log.warn(
                        "Could not find a suitable line on which "
                        "to display lint result for line %d.",
                        parser.ErrorLineNumber)
                    return None
                else:
                    lineStart -= 1
                    retreated = 1
            if retreated:
                result.description += " (error is on line %d, but displayed "\
                                      "here)" % parser.ErrorLineNumber
            result.lineStart = result.lineEnd = lineStart

            result.columnStart = 1
            result.columnEnd = len(lines[result.lineEnd - 1]) + 1
            result.severity = result.SEV_ERROR
            results.addResult(result)
        return results
Beispiel #24
0
"""