def test_getXMLSerializer(self): t = utils.getXMLSerializer(self.create_iterable()) self.failUnless(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.failUnless(t2 is t) self.assertEqual( "<html><head><title>My homepage</title></head><body>Hello, world!</body></html>", "".join(t2))
def test_getXMLSerializer(self): t = utils.getXMLSerializer(self.create_iterable()) self.assertTrue(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.assertTrue(t2 is t) self.assertEqual( b"<html><head><title>My homepage</title></head><body>Hello, wörld!</body></html>", b"".join(t2)) self.assertEqual( u"<html><head><title>My homepage</title></head><body>Hello, wörld!</body></html>", u"".join(t2.serialize(encoding=unicode)))
def test_getXMLSerializer(self): t = utils.getXMLSerializer(self.create_iterable()) self.failUnless(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.failUnless(t2 is t) self.assertEqual( b"<html><head><title>My homepage</title></head><body>Hello, wörld!</body></html>", b"".join(t2)) self.assertEqual( u"<html><head><title>My homepage</title></head><body>Hello, wörld!</body></html>", u"".join(t2.serialize(encoding=unicode)))
def getHTMLSerializer(iterable, pretty_print=False, encoding=None): """Convenience method to create an XMLSerializer instance using the HTML parser and string serialization. If the doctype is XHTML or XHTML transitional, use the XML serializer.""" serializer = getXMLSerializer( iterable, parser=html.HTMLParser, serializer=html.tostring, pretty_print=pretty_print, encoding=encoding, ) if serializer.tree.docinfo.doctype and 'XHTML' in serializer.tree.docinfo.doctype: # MONKEYPATCH FIXME: etree.tostring breaks <script/>-tags, which contain # with CDATA javascript. Don't use etree.tostring, if any found. # # The long story: # # Some formwidgets on Plone do still use inline javascript, and some of them # contain CDATA. Unfortunately, <![CDATA[]]> parsed with html.HTMLParser will # break when serialized with etree.tostring. Yet, <![CDATA[]]> must remain # commented within <script/> (e.g. //<![CDATA[ or /* <![CDATA[ */), because # Plone delivers text/html, not application/xhtml+xml, which is required to # properly handle <![CDATA[]]>! if len(serializer.tree.xpath("//script[contains(text(), '<![CDATA[')]")) == 0: serializer.serializer = etree.tostring return serializer
def test_getHTMLSerializer_doctype_xhtml_serializes_to_xhtml(self): t = utils.getHTMLSerializer(self.create_iterable(preamble='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n', body='<img src="foo.png" />'), pretty_print=True) self.failUnless(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.failUnless(t2 is t) self.assertEqual( '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml">\n <head>\n <meta http-equiv="Content-Type" content="text/html; charset=ASCII" />\n <title>My homepage</title>\n </head>\n <body>Hello, world!<img src="foo.png" /></body>\n</html>\n', "".join(t2))
def test_getHTMLSerializer(self): t = utils.getHTMLSerializer(self.create_iterable(body='<img src="foo.png" />'), pretty_print=True) self.failUnless(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.failUnless(t2 is t) self.assertEqual( '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">\n<html>\n<head><title>My homepage</title></head>\n<body>Hello, world!<img src="foo.png">\n</body>\n</html>\n', "".join(t2))
def test_getHTMLSerializer(self): t = utils.getHTMLSerializer( self.create_iterable(body='<img src="foo.png" />'), pretty_print=True) self.failUnless(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.failUnless(t2 is t) self.assertEqual( '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">\n<html>\n<head><title>My homepage</title></head>\n<body>Hello, world!<img src="foo.png">\n</body>\n</html>\n', "".join(t2))
def test_getHTMLSerializer_doctype_xhtml_serializes_to_xhtml(self): t = utils.getHTMLSerializer(self.create_iterable( preamble= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n', body='<img src="foo.png" />'), pretty_print=True) self.failUnless(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.failUnless(t2 is t) self.assertEqual( '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml">\n <head>\n <meta http-equiv="Content-Type" content="text/html; charset=ASCII" />\n <title>My homepage</title>\n </head>\n <body>Hello, world!<img src="foo.png" /></body>\n</html>\n', "".join(t2))
def test_replace_doctype_blank(self): t = utils.getHTMLSerializer(self.create_iterable(preamble='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n', body='<img src="foo.png" />'), pretty_print=True, doctype="") self.assertTrue(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.assertTrue(t2 is t) self.assertEqual( b'<html xmlns="http://www.w3.org/1999/xhtml">\n <head>\n <meta http-equiv="Content-Type" content="text/html; charset=ASCII" />\n <title>My homepage</title>\n </head>\n <body>Hello, wörld!<img src="foo.png" /></body>\n</html>', b"".join(t2).strip()) self.assertEqual( u'<html xmlns="http://www.w3.org/1999/xhtml">\n <head>\n <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n <title>My homepage</title>\n </head>\n <body>Hello, wörld!<img src="foo.png" /></body>\n</html>', u"".join(t2.serialize(encoding=unicode)).strip())
def test_getHTMLSerializer(self): t = utils.getHTMLSerializer(self.create_iterable(body='<img src="foo.png" />'), pretty_print=True) self.assertTrue(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.assertTrue(t2 is t) self.assertEqual( b'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">\n<html>\n<head><title>My homepage</title></head>\n<body>Hello, wörld!<img src="foo.png">\n</body>\n</html>', b"".join(t2).strip()) self.assertEqual( u'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">\n<html>\n<head><title>My homepage</title></head>\n<body>Hello, wörld!<img src="foo.png">\n</body>\n</html>', u"".join(t2.serialize(encoding=unicode)).strip())
def test_replace_doctype_blank(self): t = utils.getHTMLSerializer(self.create_iterable(preamble='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n', body='<img src="foo.png" />'), pretty_print=True, doctype="") self.failUnless(isinstance(t, serializer.XMLSerializer)) t2 = utils.getXMLSerializer(t) self.failUnless(t2 is t) self.assertEqual( b'<html xmlns="http://www.w3.org/1999/xhtml">\n <head>\n <meta http-equiv="Content-Type" content="text/html; charset=ASCII" />\n <title>My homepage</title>\n </head>\n <body>Hello, wörld!<img src="foo.png" /></body>\n</html>\n', b"".join(t2)) self.assertEqual( u'<html xmlns="http://www.w3.org/1999/xhtml">\n <head>\n <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n <title>My homepage</title>\n </head>\n <body>Hello, wörld!<img src="foo.png" /></body>\n</html>\n', u"".join(t2.serialize(encoding=unicode)))
def test_length(self): t = utils.getXMLSerializer(self.create_iterable()) self.failUnless(len(t) == 1) self.failUnless(len(list(t)) == 1)
def test_length(self): t = utils.getXMLSerializer(self.create_iterable()) self.assertTrue(len(t) == 1) self.assertTrue(len(list(t)) == 1)