Beispiel #1
0
def buildTestSuite():
    for filename in get_data_files("encoding"):
        test_name = os.path.basename(filename).replace('.dat',''). \
            replace('-','')
        tests = TestData(filename, "data")
        for idx, test in enumerate(tests):
            def encodingTest(self, data=test['data'], 
                             encoding=test['encoding']):
                p = HTMLParser()
                t = p.parse(data, useChardet=False)
                
                errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
                                (data, repr(encoding.lower()), 
                                 repr(p.tokenizer.stream.charEncoding)))
                self.assertEquals(encoding.lower(),
                                  p.tokenizer.stream.charEncoding[0], 
                                  errorMessage)
            setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1),
                encodingTest)

    try:
        import chardet
        def test_chardet(self):
            data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
            encoding = inputstream.HTMLInputStream(data).charEncoding
            assert encoding[0].lower() == "big5"
        setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
    except ImportError:
        print "chardet not found, skipping chardet tests"
        

    return unittest.defaultTestLoader.loadTestsFromName(__name__)
Beispiel #2
0
def main(out_path):
    if not os.path.exists(out_path):
        sys.stderr.write("Path %s does not exist"%out_path)
        sys.exit(1)

    for filename in support.get_data_files('tokenizer', '*.test'):
        run_file(filename, out_path)
def test_serializer():
    for filename in get_data_files('serializer', '*.test'):
        with open(filename) as fp:
            tests = json.load(fp)
            test_name = os.path.basename(filename).replace('.test','')
            for index, test in enumerate(tests['tests']):
                xhtml = test.get("xhtml", test["expected"])
                if test_name == 'optionaltags': 
                    xhtml = None
                yield runSerializerTest, test["input"], test["expected"], xhtml, test.get("options", {})
Beispiel #4
0
def test_serializer():
    for filename in get_data_files('serializer', '*.test'):
        with open(filename) as fp:
            tests = json.load(fp)
            test_name = os.path.basename(filename).replace('.test', '')
            for index, test in enumerate(tests['tests']):
                xhtml = test.get("xhtml", test["expected"])
                if test_name == 'optionaltags':
                    xhtml = None
                yield runSerializerTest, test["input"], test[
                    "expected"], xhtml, test.get("options", {})
def testTokenizer():
    for filename in get_data_files('tokenizer', '*.test'):
        with open(filename) as fp:
            tests = json.load(fp)
            testName = os.path.basename(filename).replace(".test","")
            if 'tests' in tests:
                for index,test in enumerate(tests['tests']):
                #Skip tests with a self closing flag
                    skip = False
                    if 'initialStates' not in test:
                        test["initialStates"] = ["Data state"]
                    for initialState in test["initialStates"]:
                        test["initialState"] = capitalize(initialState)
                        yield runTokenizerTest, test
Beispiel #6
0
def testTokenizer():
    for filename in get_data_files('tokenizer', '*.test'):
        with open(filename) as fp:
            tests = json.load(fp)
            testName = os.path.basename(filename).replace(".test", "")
            if 'tests' in tests:
                for index, test in enumerate(tests['tests']):
                    #Skip tests with a self closing flag
                    skip = False
                    if 'initialStates' not in test:
                        test["initialStates"] = ["Data state"]
                    for initialState in test["initialStates"]:
                        test["initialState"] = capitalize(initialState)
                        yield runTokenizerTest, test
def test_treewalker():
    sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")

    for treeName, treeCls in treeTypes.iteritems():
        files = get_data_files('tree-construction')
        for filename in files:
            testName = os.path.basename(filename).replace(".dat","")

            tests = TestData(filename, "data")

            for index, test in enumerate(tests):
                (input, errors,
                 innerHTML, expected) = [test[key] for key in ("data", "errors",
                                                               "document-fragment",
                                                               "document")]
                errors = errors.split("\n")
                yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
Beispiel #8
0
def test_treewalker():
    sys.stdout.write('Testing tree walkers ' + " ".join(treeTypes.keys()) +
                     "\n")

    for treeName, treeCls in treeTypes.iteritems():
        files = get_data_files('tree-construction')
        for filename in files:
            testName = os.path.basename(filename).replace(".dat", "")

            tests = TestData(filename, "data")

            for index, test in enumerate(tests):
                (input, errors, innerHTML, expected) = [
                    test[key] for key in ("data", "errors",
                                          "document-fragment", "document")
                ]
                errors = errors.split("\n")
                yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
def test_parser():
    sys.stderr.write('Testing tree builders '+ " ".join(treeTypes.keys()) + "\n")
    files = get_data_files('tree-construction')
    
    for filename in files:
        testName = os.path.basename(filename).replace(".dat","")

        tests = TestData(filename, u"data")
        
        for index, test in enumerate(tests):
            input, errors, innerHTML, expected = [test[key] for key in
                                                      u'data', u'errors',
                                                      u'document-fragment',
                                                      u'document']
            if errors:
                errors = errors.split(u"\n")

            for treeName, treeCls in treeTypes.iteritems():
                for namespaceHTMLElements in (True, False):
                    print input
                    yield (runParserTest, innerHTML, input, expected, errors, treeCls,
                           namespaceHTMLElements)
Beispiel #10
0
def test_parser():
    sys.stderr.write('Testing tree builders ' + " ".join(treeTypes.keys()) +
                     "\n")
    files = get_data_files('tree-construction')

    for filename in files:
        testName = os.path.basename(filename).replace(".dat", "")

        tests = TestData(filename, u"data")

        for index, test in enumerate(tests):
            input, errors, innerHTML, expected = [
                test[key] for key in u'data', u'errors', u'document-fragment',
                u'document'
            ]
            if errors:
                errors = errors.split(u"\n")

            for treeName, treeCls in treeTypes.iteritems():
                for namespaceHTMLElements in (True, False):
                    print input
                    yield (runParserTest, innerHTML, input, expected, errors,
                           treeCls, namespaceHTMLElements)
def test_parser():
    files = get_data_files('tree-construction')

    for filename in files:
        testName = os.path.basename(filename).replace(".dat", "")
        if testName in ("template",):
            continue

        tests = TestData(filename, "data")

        for index, test in enumerate(tests):
            input, errors, innerHTML, expected = [test[key] for key in
                                                  ('data', 'errors',
                                                   'document-fragment',
                                                   'document')]
            if errors:
                errors = errors.split("\n")

            if innerHTML:
                # html.parser doesn't provide fragment parsing
                continue

            yield (runParserTest, input, expected, errors)