def test_non_html_ignored(self, workdir): # we do not try to tidy non html/xhtml files proc = Tidy() sample_path = workdir / "sample.txt" sample_path.write('Sample file.') resultpath, metadata = proc.process(str(sample_path), {'error': False}) # the document path hasn't changed assert resultpath == str(sample_path)
def test_default_xhtml(self, workdir, samples_dir): # make sure by default we get XHTML output from HTML. samples_dir.join("sample1.html").copy(workdir / "src" / "sample.html") proc = Tidy() resultpath, metadata = proc.process( str(workdir / "src" / "sample.html"), {'error': False}) contents = open(resultpath, 'rb').read() assert b'xmlns="http://www.w3.org/1999/xhtml"' in contents
def test_encoding_utf8(self, workdir, samples_dir): # make sure we get UTF-8 output and no special stuff. samples_dir.join("sample1.html").copy(workdir / "src" / "sample.html") proc = Tidy() resultpath, metadata = proc.process( str(workdir / "src" / "sample.html"), {'error': False}) contents = codecs.open(resultpath, 'r', encoding='utf-8').read() assert u'Ü' in contents assert u'Ü' not in contents