コード例 #1
0
ファイル: test_parser.py プロジェクト: alabid/html5lib
 def runParserTest(self, innerHTML, input, expected, errors, treeClass):
     #XXX - move this out into the setup function
     #concatenate all consecutive character tokens into a single token
     p = html5parser.HTMLParser(tree = treeClass)
     
     try:
         if innerHTML:
             document = p.parseFragment(StringIO.StringIO(input), innerHTML)
         else:
             try:
                 document = p.parse(StringIO.StringIO(input))
             except constants.DataLossWarning:
                 sys.stderr.write("Test input causes known dataloss, skipping")
                 return 
     except:
         errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
                               "\nTraceback:", traceback.format_exc()])
         self.assertTrue(False, errorMsg)
     
     output = convertTreeDump(p.tree.testSerializer(document))
     output = attrlist.sub(sortattrs, output)
     
     expected = convertExpected(expected)
     expected = attrlist.sub(sortattrs, expected)
     errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
                           "\nReceived:", output])
     self.assertEquals(expected, output, errorMsg)
     errStr = ["Line: %i Col: %i %s %s"%(line, col, 
                                      constants.E[errorcode], datavars) for
               ((line,col), errorcode, datavars) in p.errors]
     errorMsg2 = "\n".join(["\n\nInput:", input,
                            "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
                            "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
     if checkParseErrors:
         self.assertEquals(len(p.errors), len(errors), errorMsg2)
コード例 #2
0
    def runTest(self, innerHTML, input, expected, errors, treeClass):

        if innerHTML is not None:
            innerHTML = str(innerHTML, "utf8")
        expected = str(expected, "utf8")
        if errors is not None:
            errors = str(errors, "utf8")
            errors = errors.split("\n")

        p = html5parser.HTMLParser(tree=treeClass["builder"])
        try:
            if innerHTML:
                document = p.parseFragment(io.BytesIO(input), innerHTML)
            else:
                document = p.parse(io.BytesIO(input))
        except constants.DataLossWarning:
            #Ignore testcases we know we don't pass
            return

        document = treeClass.get("adapter", lambda x: x)(document)
        try:
            output = convertTokens(treeClass["walker"](document))
            output = attrlist.sub(sortattrs, output)
            expected = attrlist.sub(sortattrs, convertExpected(expected))
            self.assertEquals(
                expected, output, "\n".join([
                    "", "Input:",
                    str(input, "utf8"), "", "Expected:", expected, "",
                    "Received:", output
                ]))
        except NotImplementedError:
            pass  # Amnesty for those that confess...
コード例 #3
0
    def runTest(self, innerHTML, input, expected, errors, treeClass):

        if innerHTML is not None:
            innerHTML = str(innerHTML, "utf8")
        expected = str(expected, "utf8")
        if errors is not None:
            errors = str(errors, "utf8")
            errors = errors.split("\n")

        p = html5parser.HTMLParser(tree = treeClass["builder"])
        try:
            if innerHTML:
                document = p.parseFragment(io.BytesIO(input), innerHTML)
            else:
                document = p.parse(io.BytesIO(input))
        except constants.DataLossWarning:
            #Ignore testcases we know we don't pass
            return

        document = treeClass.get("adapter", lambda x: x)(document)
        try:
            output = convertTokens(treeClass["walker"](document))
            output = attrlist.sub(sortattrs, output)
            expected = attrlist.sub(sortattrs, convertExpected(expected))
            self.assertEquals(expected, output, "\n".join([
                "", "Input:", str(input, "utf8"),
                "", "Expected:", expected,
                "", "Received:", output
            ]))
        except NotImplementedError:
            pass # Amnesty for those that confess...
コード例 #4
0
def runParserTest(innerHTML, input, expected, errors, treeClass,
                  namespaceHTMLElements):
    #XXX - move this out into the setup function
    #concatenate all consecutive character tokens into a single token
    try:
        p = html5parser.HTMLParser(tree = treeClass,
                                   namespaceHTMLElements=namespaceHTMLElements)
    except constants.DataLossWarning:
        return

    try:
        if innerHTML:
            document = p.parseFragment(input, innerHTML)
        else:
            try:
                document = p.parse(input)
            except constants.DataLossWarning:
                return 
    except:
        errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
                               u"\nTraceback:", traceback.format_exc()])
        assert False, errorMsg.encode("utf8")

    output = convertTreeDump(p.tree.testSerializer(document))

    expected = convertExpected(expected)
    if namespaceHTMLElements:
        expected = namespaceExpected(r"\1<html \2>", expected)

    errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
                           u"\nReceived:", output])
    assert expected == output, errorMsg.encode("utf8")
    # errStr = [u"Line: %i Col: %i %s"%(line, col,
    #                                   constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for
    #           ((line,col), errorcode, datavars) in p.errors]

    def datavars_sub(datavars, errorcode):
        if isinstance(datavars, dict):
            return datavars
        else:
            errstr = constants.E[errorcode]
            tgt = re.compile("(\%\(\w*\)s)")
            r = tgt.search(errstr)
            d = {}
            for i,g in enumerate(r.groups()):
                d[g[2:-2]] = datavars[i]
            return d
    errStr = [u"Line: %i Col: %i %s"%(line, col,
                                      constants.E[errorcode] % datavars_sub(datavars, errorcode)) for
              ((line,col), errorcode, datavars) in p.errors]

    errorMsg2 = u"\n".join([u"\n\nInput:", input,
                            u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors),
                            u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)])
    if checkParseErrors:
            assert len(p.errors) == len(errors), errorMsg2.encode("utf-8")
コード例 #5
0
def runParserTest(innerHTML, input, expected, errors, treeClass,
                  namespaceHTMLElements):
    #XXX - move this out into the setup function
    #concatenate all consecutive character tokens into a single token
    try:
        p = html5parser.HTMLParser(tree=treeClass,
                                   namespaceHTMLElements=namespaceHTMLElements)
    except constants.DataLossWarning:
        return

    try:
        if innerHTML:
            document = p.parseFragment(input, innerHTML)
        else:
            try:
                document = p.parse(input)
            except constants.DataLossWarning:
                return
    except:
        errorMsg = u"\n".join([
            u"\n\nInput:", input, u"\nExpected:", expected, u"\nTraceback:",
            traceback.format_exc().decode('utf8')
        ])
        assert False, errorMsg

    output = convertTreeDump(p.tree.testSerializer(document))

    expected = convertExpected(expected)
    if namespaceHTMLElements:
        expected = namespaceExpected(ur"\1<html \2>", expected)

    errorMsg = u"\n".join([
        u"\n\nInput:", input, u"\nExpected:", expected, u"\nReceived:", output
    ])
    assert expected == output, errorMsg
    errStr = [
        u"Line: %i Col: %i %s" % (line, col, constants.E[errorcode] %
                                  datavars if isinstance(datavars, dict) else
                                  (datavars, ))
        for ((line, col), errorcode, datavars) in p.errors
    ]

    errorMsg2 = u"\n".join([
        u"\n\nInput:", input, u"\nExpected errors (" + unicode(len(errors)) +
        u"):\n" + u"\n".join(errors), u"\nActual errors (" +
        unicode(len(p.errors)) + u"):\n" + u"\n".join(errStr)
    ])
    if checkParseErrors:
        assert len(p.errors) == len(errors), errorMsg2
コード例 #6
0
ファイル: test_parser.py プロジェクト: glebourgeois/Pywemil
    def runParserTest(self, innerHTML, input, expected, errors, treeClass):
        #XXX - move this out into the setup function
        #concatenate all consecutive character tokens into a single token
        p = html5parser.HTMLParser(tree = treeClass)

        if innerHTML:
            innerHTML = str(innerHTML, "utf8")

        if errors:
            errors = str(errors, "utf8")
            errors = errors.split("\n")

        expected = str(expected, "utf8")

        try:
            if innerHTML:
                document = p.parseFragment(io.BytesIO(input), innerHTML)
            else:
                try:
                    document = p.parse(io.BytesIO(input))
                except constants.DataLossWarning:
                    sys.stderr.write("Test input causes known dataloss, skipping")
                    return 
        except:
            errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), 
                                  "\nExpected:", expected,
                                  "\nTraceback:", traceback.format_exc()])
            self.assertTrue(False, errorMsg)
        
        output = convertTreeDump(p.tree.testSerializer(document))
        output = attrlist.sub(sortattrs, output)
        
        expected = convertExpected(expected)
        expected = attrlist.sub(sortattrs, expected)
        errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), 
                              "\nExpected:", expected,
                              "\nReceived:", output])
        self.assertEquals(expected, output, errorMsg)
        errStr = ["Line: %i Col: %i %s %s"%(line, col, 
                                         constants.E[errorcode], datavars) for
                  ((line,col), errorcode, datavars) in p.errors]
        errorMsg2 = "\n".join(["\n\nInput:", str(input, "utf8"),
                               "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
                               "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
        if checkParseErrors:
            self.assertEquals(len(p.errors), len(errors), errorMsg2)
コード例 #7
0
    def runParserTest(self, innerHTML, input, expected, errors, treeClass,
        namespaceHTMLElements):
        #XXX - move this out into the setup function
        #concatenate all consecutive character tokens into a single token
        try:
            p = html5parser.HTMLParser(tree = treeClass,
                                       namespaceHTMLElements=namespaceHTMLElements)
        except constants.DataLossWarning:
            return

        try:
            if innerHTML:
                document = p.parseFragment(input, innerHTML)
            else:
                try:
                    document = p.parse(input)
                except constants.DataLossWarning:
                    return 
        except:
            errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
                                   u"\nTraceback:", traceback.format_exc()])
            self.assertTrue(False, errorMsg.encode("utf8"))
        
        output = convertTreeDump(p.tree.testSerializer(document))
        output = attrlist.sub(sortattrs, output)
        
        expected = convertExpected(expected)
        expected = attrlist.sub(sortattrs, expected)
        if namespaceHTMLElements:
            expected = namespaceExpected(r"\1<html \2>", expected)
        
        errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
                               u"\nReceived:", output])
        self.assertEquals(expected, output, errorMsg.encode("utf8"))
        errStr = [u"Line: %i Col: %i %s"%(line, col, 
                                          constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for
                  ((line,col), errorcode, datavars) in p.errors]
        
        errorMsg2 = u"\n".join([u"\n\nInput:", input,
                                u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors),
                                u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)])
        if checkParseErrors:
            self.assertEquals(len(p.errors), len(errors), errorMsg2.encode("utf-8"))
コード例 #8
0
ファイル: test_treewalkers.py プロジェクト: sudosoup/streams
def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
    try:
        p = html5parser.HTMLParser(tree=treeClass["builder"])
        if innerHTML:
            document = p.parseFragment(input, innerHTML)
        else:
            document = p.parse(input)
    except constants.DataLossWarning:
        #Ignore testcases we know we don't pass
        return

    document = treeClass.get("adapter", lambda x: x)(document)
    try:
        output = convertTokens(treeClass["walker"](document))
        output = attrlist.sub(sortattrs, output)
        expected = attrlist.sub(sortattrs, convertExpected(expected))
        assert expected == output, "\n".join([
            "", "Input:", input, "", "Expected:", expected, "", "Received:",
            output
        ])
    except NotImplementedError:
        pass  # Amnesty for those that confess...
コード例 #9
0
ファイル: test_treewalkers.py プロジェクト: 1974kpkpkp/WebGL
def run_test(innerHTML, input, expected, errors, treeClass):
    try:
        p = html5parser.HTMLParser(tree = treeClass["builder"])
        if innerHTML:
            document = p.parseFragment(StringIO.StringIO(input), innerHTML)
        else:
            document = p.parse(StringIO.StringIO(input))
    except constants.DataLossWarning:
        #Ignore testcases we know we don't pass
        return

    document = treeClass.get("adapter", lambda x: x)(document)
    try:
        output = convertTokens(treeClass["walker"](document))
        output = attrlist.sub(sortattrs, output)
        expected = attrlist.sub(sortattrs, convertExpected(expected))
        assert expected == output, "\n".join([
                "", "Input:", input,
                "", "Expected:", expected,
                "", "Received:", output
                ])
    except NotImplementedError:
        pass # Amnesty for those that confess...