Example #1
0
 def test_callback(self):
     html = '<span>1</span><span>2</span>'
     self.assertListEqual(String(css='span').parse(html), ['1', '2'])
     self.assertListEqual(
         String(css='span', callback=int).parse(html), [1, 2])
     self.assertEqual(
         String(css='span:first-child', callback=int, quant=1).parse(html),
         1)
     self.assertListEqual(String(css='div', callback=int).parse(html), [])
Example #2
0
    def test_basic(self):
        # css
        val = Prefix(css='li',
                     children=[String(name='name', css='span',
                                      quant=2)]).parse(self.html)
        self.assertDictEqual(val, {'name': ['Mike', 'John']})

        # xpath
        val = Prefix(xpath='//li',
                     children=[String(name='name', xpath='span',
                                      quant=2)]).parse(self.html)
        self.assertDictEqual(val, {'name': ['Mike', 'John']})
Example #3
0
 def test_callback(self):
     val = Prefix(xpath='//li',
                  callback=lambda d: d['name'],
                  children=[
                      String(name='name', css='span', quant=2),
                  ]).parse(self.html)
     self.assertListEqual(val, ['Mike', 'John'])
Example #4
0
 def test_callback(self):
     val = Group(css='li',
                 quant=2,
                 callback=lambda d: d['name'],
                 children=[
                     String(name='name', css='span', quant=1),
                 ]).parse(self.html)
     self.assertListEqual(val, ['Mike', 'John'])
Example #5
0
    def test_basic(self):
        html = '<span data-val="rocks">Hello <b>world</b>!</span>'

        # by default extract _text
        self.assertEqual(
            String(name='val', css='span', quant=1).parse(html)['val'],
            'Hello !')

        self.assertEqual(
            String(name='val', css='span', quant=1,
                   attr='_text').parse(html)['val'], 'Hello !')
        self.assertEqual(
            String(name='val', css='span', quant=1,
                   attr='_all_text').parse(html)['val'], 'Hello world!')
        self.assertEqual(
            String(name='val', css='span', quant=1,
                   attr='data-val').parse(html)['val'], 'rocks')
        self.assertEqual(
            String(name='val', css='span', quant=1,
                   attr='data-invalid').parse(html)['val'], '')
Example #6
0
 def test_build(self):
     # missing children for Group / Prefix parsers
     self.assertRaisesRegexp(
         ParserError, r'You must specify "children" for Prefix parser',
         Prefix)
     self.assertRaisesRegexp(
         ParserError, r'You must specify "children" for Group parser',
         Group)
     # missing name of children elements
     self.assertRaisesRegexp(
         ParserError, r'Children elements inherited from BaseNamedParser',
         lambda: Prefix(children=[String()]))
     self.assertRaisesRegexp(
         ParserError, r'Children elements inherited from BaseNamedParser',
         lambda: Group(children=[String()]))
     self.assertRaisesRegexp(
         ParserError, r'Children elements inherited from BaseNamedParser',
         lambda: Prefix(children=[Prefix(children=[String()])]))
     self.assertRaisesRegexp(
         ParserError, r'Children elements inherited from BaseNamedParser',
         lambda: Prefix(children=[Group(name='x', children=[String()])]))
Example #7
0
    def test_basic(self):
        extracted = {
            'val': [{
                'name': 'Mike',
                'link': None
            }, {
                'name': 'John',
                'link': 'http://example.com/test'
            }]
        }

        # css
        val = Group(name='val',
                    css='li',
                    quant=2,
                    children=[
                        String(name='name', css='span', quant=1),
                        Url(name='link', css='a', quant='?')
                    ]).parse(self.html, url='http://example.com/')
        self.assertDictEqual(val, extracted)

        # xpath
        val = Group(name='val',
                    css='li',
                    quant=2,
                    children=[
                        String(name='name', xpath='span', quant=1),
                        Url(name='link', xpath='a', quant='?')
                    ]).parse(self.html, url='http://example.com/')
        self.assertDictEqual(val, extracted)

        val = Group(name='val',
                    css='li',
                    quant=2,
                    children=[
                        String(name='name', xpath='descendant::span', quant=1),
                        Url(name='link', xpath='descendant::a', quant='?')
                    ]).parse(self.html, url='http://example.com/')
        self.assertDictEqual(val, extracted)