def test_callback(self): html = '<span>1</span><span>2</span>' self.assertListEqual(String(css='span').parse(html), ['1', '2']) self.assertListEqual( String(css='span', callback=int).parse(html), [1, 2]) self.assertEqual( String(css='span:first-child', callback=int, quant=1).parse(html), 1) self.assertListEqual(String(css='div', callback=int).parse(html), [])
def test_basic(self): # css val = Prefix(css='li', children=[String(name='name', css='span', quant=2)]).parse(self.html) self.assertDictEqual(val, {'name': ['Mike', 'John']}) # xpath val = Prefix(xpath='//li', children=[String(name='name', xpath='span', quant=2)]).parse(self.html) self.assertDictEqual(val, {'name': ['Mike', 'John']})
def test_callback(self): val = Prefix(xpath='//li', callback=lambda d: d['name'], children=[ String(name='name', css='span', quant=2), ]).parse(self.html) self.assertListEqual(val, ['Mike', 'John'])
def test_callback(self): val = Group(css='li', quant=2, callback=lambda d: d['name'], children=[ String(name='name', css='span', quant=1), ]).parse(self.html) self.assertListEqual(val, ['Mike', 'John'])
def test_basic(self): html = '<span data-val="rocks">Hello <b>world</b>!</span>' # by default extract _text self.assertEqual( String(name='val', css='span', quant=1).parse(html)['val'], 'Hello !') self.assertEqual( String(name='val', css='span', quant=1, attr='_text').parse(html)['val'], 'Hello !') self.assertEqual( String(name='val', css='span', quant=1, attr='_all_text').parse(html)['val'], 'Hello world!') self.assertEqual( String(name='val', css='span', quant=1, attr='data-val').parse(html)['val'], 'rocks') self.assertEqual( String(name='val', css='span', quant=1, attr='data-invalid').parse(html)['val'], '')
def test_build(self): # missing children for Group / Prefix parsers self.assertRaisesRegexp( ParserError, r'You must specify "children" for Prefix parser', Prefix) self.assertRaisesRegexp( ParserError, r'You must specify "children" for Group parser', Group) # missing name of children elements self.assertRaisesRegexp( ParserError, r'Children elements inherited from BaseNamedParser', lambda: Prefix(children=[String()])) self.assertRaisesRegexp( ParserError, r'Children elements inherited from BaseNamedParser', lambda: Group(children=[String()])) self.assertRaisesRegexp( ParserError, r'Children elements inherited from BaseNamedParser', lambda: Prefix(children=[Prefix(children=[String()])])) self.assertRaisesRegexp( ParserError, r'Children elements inherited from BaseNamedParser', lambda: Prefix(children=[Group(name='x', children=[String()])]))
def test_basic(self): extracted = { 'val': [{ 'name': 'Mike', 'link': None }, { 'name': 'John', 'link': 'http://example.com/test' }] } # css val = Group(name='val', css='li', quant=2, children=[ String(name='name', css='span', quant=1), Url(name='link', css='a', quant='?') ]).parse(self.html, url='http://example.com/') self.assertDictEqual(val, extracted) # xpath val = Group(name='val', css='li', quant=2, children=[ String(name='name', xpath='span', quant=1), Url(name='link', xpath='a', quant='?') ]).parse(self.html, url='http://example.com/') self.assertDictEqual(val, extracted) val = Group(name='val', css='li', quant=2, children=[ String(name='name', xpath='descendant::span', quant=1), Url(name='link', xpath='descendant::a', quant='?') ]).parse(self.html, url='http://example.com/') self.assertDictEqual(val, extracted)