class AmazonEntry(scrapper.Item): title = scrapper.Field( "//*[@id='productTitle']/text()", lambda value, content, response: value.strip() if value else None, ) price = scrapper.Field( "//span[@class='a-color-price']/text()", lambda value, _, __: value.strip() if value else None, ) img = scrapper.Field( '//div[@id="imgTagWrapperId"]/img/@data-a-dynamic-image', get_image, )
class TestCrawlerClass(scrapper.Item): title = scrapper.Field('//div[@class="wrap"]/h1/text()') author = scrapper.Field('//div[@class="wrap"]/a/text()') author_email = scrapper.Field( '//div[@class="wrap"]/a/@href', lambda value, content, response: value.replace( 'emialto:', '', ), ) content = scrapper.Field( '//div[@class="wrap"]/div[@class="content"]/text()', lambda value, content, response: value.strip(), )
def test_basic_initialisation(self): field = scrapper.Field('h1') self.assertEqual(field.selector, 'h1') self.assertEqual(field.callback, None) self.assertEqual(field._value, None) self.assertEqual(field.__get__(None), None)
class TestCrawlerClass(scrapper.Item): title = scrapper.Field('//h1/text()')
def test_repr(self): field = scrapper.Field('//h1/text()') self.assertEqual('Field(\'//h1/text()\', None)', repr(field))
def test_returned_value_must_be_exact(self): field = scrapper.Field('//h1/text()') field._value = 1234 self.assertEqual(str(field.__get__(None)), '1234') self.assertEqual(type(field.__get__(None)), int)
def test_raises_exception(self): with self.assertRaises(ValueError): scrapper.Field()
class TestClassItem(scrapper.Item): name = scrapper.Field('//h1/text()')
class WykopItem(scrapper.Item): title = scrapper.Field( '//div[contains(@class, "lcontrast")]/h2/a/text()', lambda value, content, response: value.strip() if value else None, ) link = scrapper.Field('//div[contains(@class, "lcontrast")]/h2/a/@href')
class RedditEntry(scrapper.Item): title = scrapper.Field( '//p[@class="title"]/a/text()', lambda value, content, response: value.strip() if value else None, ) link = scrapper.Field('//p[@class="title"]/a/@href')
class ImgurEntry(scrapper.Item): link = scrapper.Field('.//a[@class="image-list-link"]/@href') description = scrapper.Field('.//div[@class="hover"]/p/text()')