Example #1
0
    def test_get_xpath(self):
        l = TstItemLoader(response=self.response)
        self.assertEqual(l.get_xpath('//p/text()'), [u'paragraph'])
        self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), u'paragraph')
        self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'),
                         u'pa')

        self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']),
                         [u'paragraph', 'marta'])
Example #2
0
    def test_get_css(self):
        l = TstItemLoader(response=self.response)
        self.assertEqual(l.get_css('p::text'), [u'paragraph'])
        self.assertEqual(l.get_css('p::text', TakeFirst()), u'paragraph')
        self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), u'pa')

        self.assertEqual(l.get_css(['p::text', 'div::text']),
                         [u'paragraph', 'marta'])
        self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']),
                         [u'http://www.scrapy.org', u'/images/logo.png'])
Example #3
0
    def test_replace_css_multi_fields(self):
        l = TstItemLoader(response=self.response)
        l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])

        l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x})
        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
        l.replace_css(None, 'img::attr(src)', TakeFirst(),
                      lambda x: {'url': x})
        self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
Example #4
0
    def test_get_value(self):
        il = NameItemLoader()
        self.assertEqual(
            u'FOO',
            il.get_value([u'foo', u'bar'], TakeFirst(), six.text_type.upper))
        self.assertEqual([u'foo', u'bar'],
                         il.get_value([u'name:foo', u'name:bar'],
                                      re=u'name:(.*)$'))
        self.assertEqual(
            u'foo',
            il.get_value([u'name:foo', u'name:bar'],
                         TakeFirst(),
                         re=u'name:(.*)$'))

        il.add_value('name', [u'name:foo', u'name:bar'],
                     TakeFirst(),
                     re=u'name:(.*)$')
        self.assertEqual([u'foo'], il.get_collected_values('name'))
        il.replace_value('name', u'name:bar', re=u'name:(.*)$')
        self.assertEqual([u'bar'], il.get_collected_values('name'))
Example #5
0
 class MyLoader(ItemLoader):
     name_out = Compose(
         lambda vs: vs[0])  # take first which allows empty values
     price_out = Compose(TakeFirst(), float)
     sku_out = Compose(TakeFirst(), validate_sku)
Example #6
0
 def test_replace_xpath_multi_fields(self):
     l = TstItemLoader(response=self.response)
     l.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x})
     self.assertEqual(l.get_output_value('name'), [u'Marta'])
     l.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x})
     self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
Example #7
0
 def test_take_first(self):
     proc = TakeFirst()
     self.assertEqual(proc([None, '', 'hello', 'world']), 'hello')
     self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0)
Example #8
0
 class NameFirstItemLoader(NameItemLoader):
     name_in = TakeFirst()
Example #9
0
        class MyLoader(ItemLoader):
            url_out = TakeFirst()

            def img_url_out(self, values):
                return (self.get_output_value('url') or '') + values[0]
Example #10
0
        old_locale = localelib.getlocale(localelib.LC_TIME)
        localelib.setlocale(localelib.LC_TIME, locale)

    time_s = time.strptime(value, format)
    dt = datetime.datetime(*time_s[0:5])
    # 1900 is the default year from strptime, means no year parsed
    if dt.year == 1900:
        dt = dt.replace(year=datetime.datetime.utcnow().year)

    if locale:
        localelib.setlocale(localelib.LC_TIME, old_locale)

    return dt


def to_date(value, format, locale=None):
    return to_datetime(value, format, locale).date()


def to_time(value, format):
    time_s = time.strptime(value, format)
    return datetime.time(time_s[3], time_s[4])


# defaults

default_input_processor = MapCompose(replace_br, remove_tags, unquote_markup,
                                     replace_escape, strip, clean_spaces)

default_output_processor = TakeFirst()