def test_null_bytes(self): hxs = HtmlXPathSelector(text='<root>la\x00la</root>') self.assertEqual(hxs.extract(), u'<html><body><root>lala</root></body></html>') xxs = XmlXPathSelector(text='<root>la\x00la</root>') self.assertEqual(xxs.extract(), u'<root>lala</root>')
def test_unquote(self): xmldoc = "\n".join( ( "<root>", " lala", " <node>", " blabla&more<!--comment-->a<b>test</b>oh", " <![CDATA[lalalal&ppppp<b>PPPP</b>ppp&la]]>", " </node>", " pff", "</root>", ) ) xxs = XmlXPathSelector(text=xmldoc) self.assertEqual(xxs.extract_unquoted(), u"") self.assertEqual(xxs.select("/root").extract_unquoted(), [u""]) self.assertEqual(xxs.select("/root/text()").extract_unquoted(), [u"\n lala\n ", u"\n pff\n"]) self.assertEqual(xxs.select("//*").extract_unquoted(), [u"", u"", u""]) self.assertEqual( xxs.select("//text()").extract_unquoted(), [ u"\n lala\n ", u"\n blabla&more", u"a", u"test", u"oh\n ", u"lalalal&ppppp<b>PPPP</b>ppp&la", u"\n ", u"\n pff\n", ], )
def test_unquote(self): xmldoc = '\n'.join(( '<root>', ' lala', ' <node>', ' blabla&more<!--comment-->a<b>test</b>oh', ' <![CDATA[lalalal&ppppp<b>PPPP</b>ppp&la]]>', ' </node>', ' pff', '</root>')) xxs = XmlXPathSelector(text=xmldoc) self.assertEqual(xxs.extract_unquoted(), u'') self.assertEqual(xxs.select('/root').extract_unquoted(), [u'']) self.assertEqual(xxs.select('/root/text()').extract_unquoted(), [ u'\n lala\n ', u'\n pff\n']) self.assertEqual(xxs.select('//*').extract_unquoted(), [u'', u'', u'']) self.assertEqual(xxs.select('//text()').extract_unquoted(), [ u'\n lala\n ', u'\n blabla&more', u'a', u'test', u'oh\n ', u'lalalal&ppppp<b>PPPP</b>ppp&la', u'\n ', u'\n pff\n'])
def test_unquote(self): xmldoc = '\n'.join( ('<root>', ' lala', ' <node>', ' blabla&more<!--comment-->a<b>test</b>oh', ' <![CDATA[lalalal&ppppp<b>PPPP</b>ppp&la]]>', ' </node>', ' pff', '</root>')) xxs = XmlXPathSelector(text=xmldoc) self.assertEqual(xxs.extract_unquoted(), u'') self.assertEqual(xxs.select('/root').extract_unquoted(), [u'']) self.assertEqual( xxs.select('/root/text()').extract_unquoted(), [u'\n lala\n ', u'\n pff\n']) self.assertEqual(xxs.select('//*').extract_unquoted(), [u'', u'', u'']) self.assertEqual( xxs.select('//text()').extract_unquoted(), [ u'\n lala\n ', u'\n blabla&more', u'a', u'test', u'oh\n ', u'lalalal&ppppp<b>PPPP</b>ppp&la', u'\n ', u'\n pff\n' ])