def test_custom_quotes(): xml = """<html><body> <p>"Hello" --- 'World'</p> <p>That's all, folks...</p> </body></html>""" format = Format(educate=True, squotes='{}', dquotes=['<<', '>>'], apostrophe='`', dashes='-~', ellipsis='++') xml_strange = """<html><body> <p><<Hello>> ~ {World}</p> <p>That`s all, folks++</p> </body></html>""" assert serialize(xml, format) == serialize(xml_strange) format = Format(educate=True, squotes=u'\u201a\u2018', dquotes=u'\u201e\u201c', apostrophe="'", dashes=u'\u2013\u2013', ellipsis=u'\u2014') xml_german = """<html><body> <p>„Hello“ – ‚World‘</p> <p>That's all, folks—</p> </body></html>""" assert serialize(xml, format) == serialize(xml_german)
def test_format_wrap(): xml = """<body> It's a long way to Tipperary, It's a long way to go. It's a long way to Tipperary To the sweetest girl I know. </body>""" format = Format(wrap=True) s = serialize(xml, format) assert s.endswith( "<body> " "It's a long way to Tipperary, It's a long way to go. " "It's a long way to\nTipperary To the sweetest girl I know. " "</body>") format = Format(wrap=80) s2 = serialize(xml, format) format = Format(wrap=32) s = serialize(xml, format) assert s.endswith("<body> It's a long way to Tipperary,\n" "It's a long way to go. It's a\n" "long way to Tipperary To the\n" "sweetest girl I know. </body>") xml = '<html>%s</html>' % xml format = Format(wrap=32, indent=True, min_level=0) s = serialize(xml, format) assert s.endswith("\t<body> It's a long way to\n" "\t\tTipperary, It's\n\t\ta long way to\n" "\t\tgo. It's a long\n\t\tway to Tipperary\n" "\t\tTo the sweetest\n\t\tgirl I know.\n" "\t</body>\n</html>")
def test_format_strip(): format_strip = Format(strip=True) format_lstrip = Format(lstrip=True) format_rstrip = Format(rstrip=True) format_lrstrip = Format(lstrip=True, rstrip=True) assert repr(format_strip) != repr(format_lstrip) assert repr(format_strip) != repr(format_rstrip) assert repr(format_strip) == repr(format_lrstrip) s = serialize(xml1, format_strip) assert s.endswith('<p>Hello, World</p>') s2 = serialize(xml1, format=format_lrstrip) assert s2 == s s = serialize(xml1, format_lstrip) assert s.endswith('<p>Hello, World \n \t </p>') s = serialize(xml1, format_rstrip) assert s.endswith('<p> \t \n \t Hello, World</p>') xml = '<body><p> \t </p><p> \n </p><p>n n</p><p>\t \t</p></body>' for format in (format_strip, format_lstrip, format_rstrip): s = serialize(xml, format, 'xml') assert s.endswith('<body><p /><p /><p>n n</p><p /></body>') s = serialize(xml1, format_strip, 'xml') assert s.endswith('<p>Hello, World</p>') s = serialize(xml1, format_strip, 'xhtml') assert s.endswith('<p>Hello, World</p>') s = serialize(xml1, format_strip) assert s.endswith('<p>Hello, World</p>') s = serialize(xml1, format_strip, 'HTML') assert s.endswith('<P>Hello, World</P>') s = serialize(xml1, format_strip, 'plain') assert s == 'Hello, World'
def test_format_simple_whitespace(): format = Format(simple_whitespace=True) assert repr(format) != repr(Format()) s = serialize(xml1, format) assert s.endswith('<p>\nHello, World\n</p>') s = serialize(xml2, format) assert s.endswith('<p> Hello, World </p>') s = serialize(xml3, format) assert s.endswith('<p>\nHello,\nWorld\n</p>')
def test_format_with_clean_whitespace(): clean_whitespace = Format.clean_whitespace format = Format(clean_whitespace) assert repr(format) != repr(Format()) s = serialize(xml1, format) assert s.endswith('<p>Hello, World</p>') s = serialize(xml2, format) assert s.endswith('<p>Hello, World</p>') s = serialize(xml3, format) assert s.endswith('<p>Hello,\nWorld</p>')
def test_format_simple_newlines(): format = Format(simple_newlines=True) format2 = Format(no_empty_lines=True) assert repr(format) != repr(Format()) assert repr(format) == repr(format2) s = serialize(xml1, format) assert s.endswith(xml1) s = serialize(xml2, format) assert s.endswith(xml2) s = serialize(xml3, format) assert s.endswith('<p>\nHello, \n\tWorld\n</p>')
def test_nbsp(): """Check that is rendered correctly.""" xml = '<p>Dr. Snuggles</p>' t = kid.Template(source=xml) for output in 'xml', 'html', 'xhtml': format = Format() r = t.serialize(output=output, format=format, encoding='ascii') assert r.endswith(xml.replace(' ', ' ')) format = Format(entity_map=True) r = t.serialize(output=output, format=format, encoding='ascii') assert r.endswith(xml) format = Format(entity_map={u'\xa0': ' Mooney '}) r = t.serialize(output=output, format=format, encoding='ascii') assert r.endswith(xml.replace(' ', ' Mooney '))
def test_wrap_lines(): wrap = Format(wrap=80).wrap_lines s = 'Hello, World!' assert wrap(s) == s assert wrap(s, 13) == s assert wrap(s, 12) == 'Hello,\nWorld!' assert wrap(s, 6) == 'Hello,\nWorld!' assert wrap(s, 0) == 'Hello,\nWorld!' assert wrap(s, 13, 1) == 'Hello,\nWorld!' s = ' 1234567890' assert wrap(s * 9) == s * 7 + s.replace(' ', '\n') + s assert wrap(s * 9, 80) == wrap(s * 9) assert wrap(s * 9, 40) != wrap(s * 9) assert wrap(s * 9, 20) == s + 8 * s.replace(' ', '\n') assert wrap(s * 9, 21) != wrap(s * 9, 20) assert wrap(s * 9, 11) == wrap(s * 9, 20) assert wrap(s * 9, 10) == '\n' + wrap(s * 9, 20).lstrip() assert wrap(s * 9, 0) == wrap(s * 9, 10) s = 'a ab abc' assert wrap(s) == s assert wrap(s, 8) == s assert wrap(s, 7) == 'a ab\nabc' assert wrap(s, 6) == wrap(s, 7) assert wrap(s, 5) == wrap(s, 7) assert wrap(s, 4) == wrap(s, 7) assert wrap(s, 3) == 'a\nab\nabc' assert wrap(s, 2) == wrap(s, 3) assert wrap(s, 1) == wrap(s, 3) assert wrap(s, 0) == wrap(s, 3) assert wrap(s, 4, 1) == wrap(s, 3) assert wrap(s, 80, 79) == 'a\nab abc' assert wrap(s, 80, 80) == '\n' + s
def test_format_custom(): strip = Format.strip xml = """<body> Sometimes you need to be British to understand Monty Python. </body>""" f1 = lambda s: s.replace('Monty ', '') f2 = lambda s: s.replace('British', 'Dutch') format = Format(strip, f1, f2) s = serialize(xml, format) assert s.endswith("<body>Sometimes " "you need to be Dutch to understand Python.</body>") format = Format(f2, f1, strip) s2 = serialize(xml, format) assert s2 == s format = Format(f1, f2) s2 = serialize(xml, format) assert s2 != s f3 = lambda s: s.replace('Dutch', 'a Python') f4 = lambda s: s.replace('Monty', 'a') format = Format(strip, f4, f1, f2, f3) s = serialize(xml, format) assert s.endswith("<body>Sometimes " "you need to be a Python to understand a Python.</body>") format = Format(strip, f4, f1, f2, f3, wrap=10) s = serialize(xml, format) assert s.endswith("<body>Sometimes\nyou need\nto be a\nPython to\n" "understand\na Python.</body>") xml = '<b>kId\t</b>' f = lambda s: s.capitalize() assert serialize(xml, Format(f), 'plain') == 'Kid\t' f = lambda s: s.lower() assert serialize(xml, Format(f, rstrip=True), 'plain') == 'kid'
def serialize(self, stream, encoding=None, fragment=False, format=None): try: text = ''.join(self.generate(stream, encoding, fragment, format)) except TypeError: # workaround for bug 905389 in Python < 2.5 text = ''.join( tuple(self.generate(stream, encoding, fragment, format))) if not fragment: text = Format.strip(text) return text
def serialize(self, stream, encoding=None, fragment=False, format=None): try: text = ''.join(self.generate(stream, encoding, fragment, format)) except TypeError: # workaround for bug 905389 in Python < 2.5 text = ''.join(tuple( self.generate(stream, encoding, fragment, format))) if not fragment: text = Format.strip(text) return text
def test_noformat_tags(): """Check that the content of some tags is not formatted.""" format = Format(lambda s: s.lower()) xml = '<%s>Hello, World!</%s>' format_tags = 'address div h1 p quote span'.split() noformat_tags = 'code kbd math pre script textarea'.split() for tag in format_tags + noformat_tags: x = xml % (tag, tag) s = serialize(x, format) if tag in format_tags: x = x.lower() assert s.endswith(x)
def test_educate_quotes(): educate_quotes = Format().educate_quotes assert (educate_quotes("'Hello' \"World\"") == u"\u2018Hello\u2019 \u201cWorld\u201d") assert (educate_quotes("'Hello', \"World\"!") == u"\u2018Hello\u2019, \u201cWorld\u201d!") assert (educate_quotes("'Hello,' \"World!\"") == u"\u2018Hello,\u2019 \u201cWorld!\u201d") assert (educate_quotes("('Hello World')") == u"(\u2018Hello World\u2019)") assert (educate_quotes("'(Hello World)'") == u"\u2018(Hello World)\u2019") assert (educate_quotes('"Isn\'t this fun?"') == u"\u201cIsn\u2019t this fun?\u201d") assert (educate_quotes("The 70's and '80s weren't fun.") == u"The 70\u2019s and \u201980s weren\u2019t fun.")
def test_format_strip_lines(): format_strip_lines = Format(strip_lines=True) format_lstrip_lines = Format(lstrip_lines=True) format_rstrip_lines = Format(rstrip_lines=True) format_lrstrip_lines = Format(lstrip_lines=True, rstrip_lines=True) assert repr(format_strip_lines) != repr(format_lstrip_lines) assert repr(format_strip_lines) != repr(format_rstrip_lines) assert repr(format_strip_lines) == repr(format_lrstrip_lines) s = serialize(xml1, format_strip_lines) assert s.endswith('<p>\nHello, World\n</p>') s2 = serialize(xml1, format_lrstrip_lines) assert s2 == s s = serialize(xml1, format_lstrip_lines) assert s.endswith('<p>\nHello, World \n</p>') s = serialize(xml1, format_rstrip_lines) assert s.endswith('<p>\n \t Hello, World\n</p>') xml = '<body><p> \t </p><p> \nn\n\t\n </p><p>\t \t</p></body>' for format in (format_strip_lines, format_lrstrip_lines, format_lstrip_lines, format_rstrip_lines): s = serialize(xml, format, 'xml') assert s.endswith('<body><p /><p>\nn\n\n</p><p /></body>') s = serialize(xml1, format_strip_lines, 'plain') assert s == '\nHello, World\n'
def test_intent_lines(): indent = Format(indent='\t').indent_lines assert indent(string1, '') == ' \t \nHello, World \n' assert indent(string2, '') == string2 assert indent(string3, '') == '\n\n\nHello, \n\nWorld\n\n' assert indent(string1) == ' \t \n\tHello, World \n\t' assert indent(string2) == string2 assert indent(string3) == '\n\t\n\t\n\tHello, \n\t\n\tWorld\n\t\n\t' assert indent(string1) == indent(string1, '\t') assert indent(string2) == indent(string2, '\t') assert indent(string3) == indent(string3, '\t') assert indent(string1, '*') == ' \t \n*Hello, World \n*' assert indent(string2, '*') == string2 assert indent(string3, ' ') == '\n \n \n Hello, \n \n World\n \n ' assert indent('\nprint "Hello"', ' 10 ') == '\n 10 print "Hello"'
def test_doctype_and_injection(): serializer = HTMLSerializer(encoding='utf-8', transpose=True) serializer.doctype = doctypes['html-strict'] serializer.inject_type = True source = "<html><head /></html>" t = kid.Template(source) t.serializer = serializer from kid.format import Format format = Format(no_empty_lines=True) rslt = t.serialize(format=format) rslt = rslt.replace('\n', '') expected = ( '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"' ' "http://www.w3.org/TR/html4/strict.dtd">' '<HTML><HEAD>' '<META CONTENT="text/html; charset=utf-8" HTTP-EQUIV="content-type">' '</HEAD></HTML>') assert rslt == expected serializer = HTMLSerializer(encoding='ascii', transpose=False) serializer.doctype = None serializer.inject_type = True source = ('<html><head>' '<meta content="Reiner Wahnsinn" name="author"/>' '<meta content="nonsense" http-equiv="content-type"/>' '<meta content="garbage" name="keywords"/>' '<title>Content</title>' '</head><body><h1>Hello, World!</h1></body></html>') t = kid.Template(source) t.serializer = serializer rslt = t.serialize() expected = source.replace('/>', '>') assert rslt == expected source = source.replace('content-type', 'garbage-type') t = kid.Template(source) t.serializer = serializer rslt = t.serialize() rslt = rslt.replace('\n', '') expected = source.replace('/>', '>').replace( '<title>', '<meta content="text/html; charset=ascii"' ' http-equiv="content-type"><title>') assert rslt == expected
def test_educate_backticks(): educate_backticks = Format().educate_backticks assert (educate_backticks("`Hello' ``World\"") == u"\u2018Hello\u2019 \u201cWorld\u201d")
def generate(self, stream, encoding=None, fragment=False, format=None): """Serializes an event stream to bytes of the specified encoding. This function yields an encoded string over and over until the stream is exhausted. """ decl = self.decl doctype = self.doctype encoding = encoding or self.encoding or 'utf-8' entity_map = self.entity_map format = self._get_format(format) if format: if format.decl is not None: decl = format.decl if format.doctype is not None: doctype = format.doctype if format.entity_map is not None: entity_map = format.entity_map if entity_map == True: # if True, use default HTML entity map entity_map = default_entity_map elif entity_map == False: entity_map = None if isinstance(doctype, basestring): # allow doctype strings doctype = doctypes[self.doctype] escape_cdata = XMLSerializer.escape_cdata escape_attrib = XMLSerializer.escape_attrib lastev = None stream = iter(stream) names = NamespaceStack(self.namespaces) if not fragment: if decl: yield '<?xml version="1.0" encoding="%s"?>\n' % encoding if doctype is not None: yield serialize_doctype(doctype) + '\n' text = None for ev, item in self.apply_filters(stream, format): if ev in (START, END) and item.tag == Fragment: continue elif ev == TEXT: if text is not None: text = u''.join([text, item]) else: text = item continue if lastev == START: if ev == END and (not text or not (Format.strip(text) or self.is_formatted(item.tag))) \ and self.can_be_empty_element(item.tag): yield ' />' lastev = END text = None names.pop() continue yield ">" if text: yield escape_cdata(text, encoding, entity_map) text = None if ev == START: if item.tag == Comment: yield "<!--%s-->" % item.text.encode(encoding) lastev = COMMENT continue elif item.tag == ProcessingInstruction: yield "<?%s?>" % item.text.encode(encoding) lastev = PI continue else: current_names = names.current names.push(namespaces(item, remove=True)) qname = names.qname(item.tag, default=True) yield "<" + qname.encode(encoding) for k, v in item.attrib.items(): k = names.qname(k, default=False).encode(encoding) v = escape_attrib(v, encoding) yield ' %s="%s"' % (k, v) for prefix, uri in names.current.items(): if prefix not in current_names \ or current_names[prefix] != uri: v = escape_attrib(uri, encoding) if prefix: k = 'xmlns:' + prefix.encode(encoding) else: k = 'xmlns' yield ' %s="%s"' % (k, v) elif ev == END and item.tag not in (Comment, ProcessingInstruction): qname = names.qname(item.tag, default=True) yield "</%s>" % qname.encode(encoding) names.pop() lastev = ev if fragment and text: yield escape_cdata(text, encoding, entity_map) return
def generate(self, stream, encoding=None, fragment=False, format=None): """Serializes an event stream to bytes of the specified encoding. This function yields an encoded string over and over until the stream is exhausted. """ decl = self.decl doctype = self.doctype encoding = encoding or self.encoding or 'utf-8' entity_map = self.entity_map format = self._get_format(format) if format: if format.decl is not None: decl = format.decl if format.doctype is not None: doctype = format.doctype if format.entity_map is not None: entity_map = format.entity_map if entity_map == True: # if True, use default HTML entity map entity_map = default_entity_map elif entity_map == False: entity_map = None if isinstance(doctype, basestring): # allow doctype strings doctype = doctypes[self.doctype] escape_cdata = XMLSerializer.escape_cdata escape_attrib = XMLSerializer.escape_attrib lastev = None stream = iter(stream) names = NamespaceStack(self.namespaces) if not fragment: if decl: yield '<?xml version="1.0" encoding="%s"?>\n' % encoding if doctype is not None: yield serialize_doctype(doctype) + '\n' text = None for ev, item in self.apply_filters(stream, format): if ev in (START, END) and item.tag == Fragment: continue elif ev == TEXT: if text is not None: text = u''.join([text, item]) else: text = item continue if lastev == START: if ev == END and (not text or not (Format.strip(text) or self.is_formatted(item.tag))) \ and self.can_be_empty_element(item.tag): yield ' />' lastev = END text = None names.pop() continue yield ">" if text: yield escape_cdata(text, encoding, entity_map) text = None if ev == START: if item.tag == Comment: yield "<!--%s-->" % item.text.encode(encoding) lastev = COMMENT continue elif item.tag == ProcessingInstruction: yield "<?%s?>" % item.text.encode(encoding) lastev = PI continue else: current_names = names.current names.push(namespaces(item, remove=True)) qname = names.qname(item.tag, default=True) yield "<" + qname.encode(encoding) for k, v in item.attrib.items(): k = names.qname(k, default=False).encode(encoding) v = escape_attrib(v, encoding) yield ' %s="%s"' % (k, v) for prefix, uri in names.current.items(): if prefix not in current_names \ or current_names[prefix] != uri: v = escape_attrib(uri, encoding) if prefix: k = 'xmlns:' + prefix.encode(encoding) else: k = 'xmlns' yield ' %s="%s"' % (k, v) elif ev == END and item.tag not in ( Comment, ProcessingInstruction): qname = names.qname(item.tag, default=True) yield "</%s>" % qname.encode(encoding) names.pop() lastev = ev if fragment and text: yield escape_cdata(text, encoding, entity_map) return
def test_educate_dashes(): educate_dashes = Format().educate_dashes assert educate_dashes("Hello--World") == u"Hello\u2013World" assert educate_dashes("Hello---World") == u"Hello\u2014World" assert educate_dashes("----") == "----"
def test_educate_ellipses(): educate_ellipses = Format().educate_ellipses assert educate_ellipses( "Hello... World. . .") == u"Hello\u2026 World\u2026" assert educate_ellipses("..... --- . . . . .") == "..... --- . . . . ."
def test_stupefy(): stupefy = Format().stupefy assert (stupefy(u"\u2018Hello\u2019\u2014\u201cWorld\u201d\u2026") == "'Hello'---\"World\"...")
def test_format_indent(): xml = ('<html><body><h1>Hello, World</h1><div>' '<p>Hello, <b>Kids</b>!</p></div></body></html>') format = Format(indent=True) s = serialize(xml, format) assert s.endswith( '<html>\n<body>\n\t<h1>Hello, World</h1>\n\t<div>\n' '\t\t<p>Hello, <b>Kids</b>!</p>\n\t</div>\n</body>\n</html>') format = Format(indent='') s = serialize(xml, format) assert s.endswith('<html>\n<body>\n<h1>Hello, World</h1>\n<div>\n' '<p>Hello, <b>Kids</b>!</p>\n</div>\n</body>\n</html>') format = Format(indent=' ') s = serialize(xml, format) assert s.endswith( '<html>\n<body>\n <h1>Hello, World</h1>\n <div>\n' ' <p>Hello, <b>Kids</b>!</p>\n </div>\n</body>\n</html>') format = Format(indent=2) s2 = serialize(xml, format) assert s2 != s format = Format(indent=3) s3 = serialize(xml, format) assert s3 == s xml = ('<html><body><h1> Hello </h1></body></html>') format = Format(indent=True, min_level=0) s = serialize(xml, format) assert s.endswith( '<html>\n\t<body>\n\t\t<h1> Hello </h1>\n\t</body>\n</html>') format = Format(indent=True, min_level=3) s = serialize(xml, format) assert s.endswith('<html>\n<body>\n<h1> Hello </h1>\n</body>\n</html>') format = Format(indent=True, max_level=2) s = serialize(xml, format) assert s.endswith('<html>\n<body>\n\t<h1> Hello </h1>\n</body>\n</html>') format = Format(indent=True, min_level=0, max_level=1) s = serialize(xml, format) assert s.endswith( '<html>\n\t<body>\n\t<h1> Hello </h1>\n\t</body>\n</html>') xml = '<html><body><pre><div><h1>Hello</h1></div></pre></body></html>' format = Format(indent=True) s = serialize(xml, format) assert s.endswith('<html>\n<body>\n\t<pre><div><h1>Hello' '</h1></div></pre>\n</body>\n</html>') s = serialize(xml, format, 'xml') assert s.endswith('<html>\n<body>\n\t<pre>\n\t\t<div>\n\t\t\t' '<h1>Hello</h1>\n\t\t</div>\n\t</pre>\n</body>\n</html>') s = serialize(xml, format, 'plain') assert '\n\t\t\tHello\n' in s xml = '<p><em>Hello</em> wonderful <em>World</em>.</p>' format = Format(indent=True) s = serialize(xml, format) assert xml in s xml = ("""<body py:strip="" xmlns:py="http://purl.org/kid/ns#"> <ul><li py:for="s in ('Hello', 'World')" py:content="s" /></ul></body>""" ) format = Format(indent=True, min_level=0, no_empty_lines=True) s = serialize(xml, format) assert '<ul>\n\t<li>Hello</li>\n\t<li>World</li>\n</ul>' in s