def test_get_xpath(self): l = TstItemLoader(response=self.response) self.assertEqual(l.get_xpath('//p/text()'), [u'paragraph']) self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), u'paragraph') self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'), u'pa') self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']), [u'paragraph', 'marta'])
def test_get_css(self): l = TstItemLoader(response=self.response) self.assertEqual(l.get_css('p::text'), [u'paragraph']) self.assertEqual(l.get_css('p::text', TakeFirst()), u'paragraph') self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), u'pa') self.assertEqual(l.get_css(['p::text', 'div::text']), [u'paragraph', 'marta']) self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']), [u'http://www.scrapy.org', u'/images/logo.png'])
def test_replace_css_multi_fields(self): l = TstItemLoader(response=self.response) l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x}) self.assertEqual(l.get_output_value('name'), [u'Marta']) l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x}) self.assertEqual(l.get_output_value('name'), [u'Paragraph']) l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x}) self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org']) l.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x}) self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
def test_get_value(self): il = NameItemLoader() self.assertEqual( u'FOO', il.get_value([u'foo', u'bar'], TakeFirst(), six.text_type.upper)) self.assertEqual([u'foo', u'bar'], il.get_value([u'name:foo', u'name:bar'], re=u'name:(.*)$')) self.assertEqual( u'foo', il.get_value([u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$')) il.add_value('name', [u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$') self.assertEqual([u'foo'], il.get_collected_values('name')) il.replace_value('name', u'name:bar', re=u'name:(.*)$') self.assertEqual([u'bar'], il.get_collected_values('name'))
class MyLoader(ItemLoader): name_out = Compose( lambda vs: vs[0]) # take first which allows empty values price_out = Compose(TakeFirst(), float) sku_out = Compose(TakeFirst(), validate_sku)
def test_replace_xpath_multi_fields(self): l = TstItemLoader(response=self.response) l.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x}) self.assertEqual(l.get_output_value('name'), [u'Marta']) l.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x}) self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
def test_take_first(self): proc = TakeFirst() self.assertEqual(proc([None, '', 'hello', 'world']), 'hello') self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0)
class NameFirstItemLoader(NameItemLoader): name_in = TakeFirst()
class MyLoader(ItemLoader): url_out = TakeFirst() def img_url_out(self, values): return (self.get_output_value('url') or '') + values[0]
old_locale = localelib.getlocale(localelib.LC_TIME) localelib.setlocale(localelib.LC_TIME, locale) time_s = time.strptime(value, format) dt = datetime.datetime(*time_s[0:5]) # 1900 is the default year from strptime, means no year parsed if dt.year == 1900: dt = dt.replace(year=datetime.datetime.utcnow().year) if locale: localelib.setlocale(localelib.LC_TIME, old_locale) return dt def to_date(value, format, locale=None): return to_datetime(value, format, locale).date() def to_time(value, format): time_s = time.strptime(value, format) return datetime.time(time_s[3], time_s[4]) # defaults default_input_processor = MapCompose(replace_br, remove_tags, unquote_markup, replace_escape, strip, clean_spaces) default_output_processor = TakeFirst()