def test_null_bytes(self):
        hxs = HtmlXPathSelector(text='<root>la\x00la</root>')
        self.assertEqual(hxs.extract(),
                         u'<html><body><root>lala</root></body></html>')

        xxs = XmlXPathSelector(text='<root>la\x00la</root>')
        self.assertEqual(xxs.extract(), u'<root>lala</root>')
예제 #2
0
    def test_null_bytes(self):
        hxs = HtmlXPathSelector(text='<root>la\x00la</root>')
        self.assertEqual(hxs.extract(),
                         u'<html><body><root>lala</root></body></html>')

        xxs = XmlXPathSelector(text='<root>la\x00la</root>')
        self.assertEqual(xxs.extract(),
                         u'<root>lala</root>')
    def test_unquote(self):
        xmldoc = "\n".join(
            (
                "<root>",
                "  lala",
                "  <node>",
                "    blabla&amp;more<!--comment-->a<b>test</b>oh",
                "    <![CDATA[lalalal&ppppp<b>PPPP</b>ppp&amp;la]]>",
                "  </node>",
                "  pff",
                "</root>",
            )
        )
        xxs = XmlXPathSelector(text=xmldoc)

        self.assertEqual(xxs.extract_unquoted(), u"")

        self.assertEqual(xxs.select("/root").extract_unquoted(), [u""])
        self.assertEqual(xxs.select("/root/text()").extract_unquoted(), [u"\n  lala\n  ", u"\n  pff\n"])

        self.assertEqual(xxs.select("//*").extract_unquoted(), [u"", u"", u""])
        self.assertEqual(
            xxs.select("//text()").extract_unquoted(),
            [
                u"\n  lala\n  ",
                u"\n    blabla&more",
                u"a",
                u"test",
                u"oh\n    ",
                u"lalalal&ppppp<b>PPPP</b>ppp&amp;la",
                u"\n  ",
                u"\n  pff\n",
            ],
        )
예제 #4
0
    def test_unquote(self):
        xmldoc = '\n'.join((
            '<root>',
            '  lala',
            '  <node>',
            '    blabla&amp;more<!--comment-->a<b>test</b>oh',
            '    <![CDATA[lalalal&ppppp<b>PPPP</b>ppp&amp;la]]>',
            '  </node>',
            '  pff',
            '</root>'))
        xxs = XmlXPathSelector(text=xmldoc)

        self.assertEqual(xxs.extract_unquoted(), u'')

        self.assertEqual(xxs.select('/root').extract_unquoted(), [u''])
        self.assertEqual(xxs.select('/root/text()').extract_unquoted(), [
            u'\n  lala\n  ',
            u'\n  pff\n'])

        self.assertEqual(xxs.select('//*').extract_unquoted(), [u'', u'', u''])
        self.assertEqual(xxs.select('//text()').extract_unquoted(), [
            u'\n  lala\n  ',
            u'\n    blabla&more',
            u'a',
            u'test',
            u'oh\n    ',
            u'lalalal&ppppp<b>PPPP</b>ppp&amp;la',
            u'\n  ',
            u'\n  pff\n'])
    def test_unquote(self):
        xmldoc = '\n'.join(
            ('<root>', '  lala', '  <node>',
             '    blabla&amp;more<!--comment-->a<b>test</b>oh',
             '    <![CDATA[lalalal&ppppp<b>PPPP</b>ppp&amp;la]]>', '  </node>',
             '  pff', '</root>'))
        xxs = XmlXPathSelector(text=xmldoc)

        self.assertEqual(xxs.extract_unquoted(), u'')

        self.assertEqual(xxs.select('/root').extract_unquoted(), [u''])
        self.assertEqual(
            xxs.select('/root/text()').extract_unquoted(),
            [u'\n  lala\n  ', u'\n  pff\n'])

        self.assertEqual(xxs.select('//*').extract_unquoted(), [u'', u'', u''])
        self.assertEqual(
            xxs.select('//text()').extract_unquoted(), [
                u'\n  lala\n  ', u'\n    blabla&more', u'a', u'test',
                u'oh\n    ', u'lalalal&ppppp<b>PPPP</b>ppp&amp;la', u'\n  ',
                u'\n  pff\n'
            ])