def buildTestSuite(): for filename in get_data_files("encoding"): test_name = os.path.basename(filename).replace('.dat',''). \ replace('-','') tests = TestData(filename, "data") for idx, test in enumerate(tests): def encodingTest(self, data=test['data'], encoding=test['encoding']): p = HTMLParser() t = p.parse(data, useChardet=False) errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"% (data, repr(encoding.lower()), repr(p.tokenizer.stream.charEncoding))) self.assertEquals(encoding.lower(), p.tokenizer.stream.charEncoding[0], errorMessage) setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1), encodingTest) try: import chardet def test_chardet(self): data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read() encoding = inputstream.HTMLInputStream(data).charEncoding assert encoding[0].lower() == "big5" setattr(Html5EncodingTestCase, 'test_chardet', test_chardet) except ImportError: print "chardet not found, skipping chardet tests" return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main(out_path): if not os.path.exists(out_path): sys.stderr.write("Path %s does not exist"%out_path) sys.exit(1) for filename in support.get_data_files('tokenizer', '*.test'): run_file(filename, out_path)
def test_serializer(): for filename in get_data_files('serializer', '*.test'): with open(filename) as fp: tests = json.load(fp) test_name = os.path.basename(filename).replace('.test','') for index, test in enumerate(tests['tests']): xhtml = test.get("xhtml", test["expected"]) if test_name == 'optionaltags': xhtml = None yield runSerializerTest, test["input"], test["expected"], xhtml, test.get("options", {})
def test_serializer(): for filename in get_data_files('serializer', '*.test'): with open(filename) as fp: tests = json.load(fp) test_name = os.path.basename(filename).replace('.test', '') for index, test in enumerate(tests['tests']): xhtml = test.get("xhtml", test["expected"]) if test_name == 'optionaltags': xhtml = None yield runSerializerTest, test["input"], test[ "expected"], xhtml, test.get("options", {})
def testTokenizer(): for filename in get_data_files('tokenizer', '*.test'): with open(filename) as fp: tests = json.load(fp) testName = os.path.basename(filename).replace(".test","") if 'tests' in tests: for index,test in enumerate(tests['tests']): #Skip tests with a self closing flag skip = False if 'initialStates' not in test: test["initialStates"] = ["Data state"] for initialState in test["initialStates"]: test["initialState"] = capitalize(initialState) yield runTokenizerTest, test
def testTokenizer(): for filename in get_data_files('tokenizer', '*.test'): with open(filename) as fp: tests = json.load(fp) testName = os.path.basename(filename).replace(".test", "") if 'tests' in tests: for index, test in enumerate(tests['tests']): #Skip tests with a self closing flag skip = False if 'initialStates' not in test: test["initialStates"] = ["Data state"] for initialState in test["initialStates"]: test["initialState"] = capitalize(initialState) yield runTokenizerTest, test
def test_treewalker(): sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n") for treeName, treeCls in treeTypes.iteritems(): files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat","") tests = TestData(filename, "data") for index, test in enumerate(tests): (input, errors, innerHTML, expected) = [test[key] for key in ("data", "errors", "document-fragment", "document")] errors = errors.split("\n") yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
def test_treewalker(): sys.stdout.write('Testing tree walkers ' + " ".join(treeTypes.keys()) + "\n") for treeName, treeCls in treeTypes.iteritems(): files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat", "") tests = TestData(filename, "data") for index, test in enumerate(tests): (input, errors, innerHTML, expected) = [ test[key] for key in ("data", "errors", "document-fragment", "document") ] errors = errors.split("\n") yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
def test_parser(): sys.stderr.write('Testing tree builders '+ " ".join(treeTypes.keys()) + "\n") files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat","") tests = TestData(filename, u"data") for index, test in enumerate(tests): input, errors, innerHTML, expected = [test[key] for key in u'data', u'errors', u'document-fragment', u'document'] if errors: errors = errors.split(u"\n") for treeName, treeCls in treeTypes.iteritems(): for namespaceHTMLElements in (True, False): print input yield (runParserTest, innerHTML, input, expected, errors, treeCls, namespaceHTMLElements)
def test_parser(): sys.stderr.write('Testing tree builders ' + " ".join(treeTypes.keys()) + "\n") files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat", "") tests = TestData(filename, u"data") for index, test in enumerate(tests): input, errors, innerHTML, expected = [ test[key] for key in u'data', u'errors', u'document-fragment', u'document' ] if errors: errors = errors.split(u"\n") for treeName, treeCls in treeTypes.iteritems(): for namespaceHTMLElements in (True, False): print input yield (runParserTest, innerHTML, input, expected, errors, treeCls, namespaceHTMLElements)
def test_parser(): files = get_data_files('tree-construction') for filename in files: testName = os.path.basename(filename).replace(".dat", "") if testName in ("template",): continue tests = TestData(filename, "data") for index, test in enumerate(tests): input, errors, innerHTML, expected = [test[key] for key in ('data', 'errors', 'document-fragment', 'document')] if errors: errors = errors.split("\n") if innerHTML: # html.parser doesn't provide fragment parsing continue yield (runParserTest, input, expected, errors)