Esempio n. 1
0
    def parse(spider: StaticSpider, response: StaticResponse) -> None:
        quotes = [quote.strip() for quote in response.xpath('//blockquote/p/text()').getall()]
        authors = [author.strip() for author in response.css('blockquote footer::text').getall()]
        for quote, author in zip(quotes, authors):
            spider.save_item({'quote': quote, 'author': author})

        link = response.xpath('//a[2][contains(@href, "page")]/@href').get()
        if link is not None:
            response.follow(link)
Esempio n. 2
0
    def test_should_save_content_to_backup_file(self, tmp_path, capsys):
        def processor(item):
            print("I'm a processor")
            return item

        backup = tmp_path / 'backup.mp'
        fruit_1 = {'fruit': 'pineapple'}
        fruit_2 = {'fruit': 'apple'}
        config = Configuration(backup_filename=f'{backup.resolve()}', item_processors=[processor])
        static_spider = StaticSpider(urls=['https://foo.com'], parse=lambda x, y: None, config=config)
        static_spider.save_item(fruit_1)
        static_spider.save_item(fruit_2)
        out, _ = capsys.readouterr()

        assert [fruit_1, fruit_2] == [item for item in read_mp(f'{backup.resolve()}')]
        assert "I'm a processor" in out
Esempio n. 3
0
    def test_should_call_item_processors_and_reject_item_if_one_processor_returns_none(self, capsys, mocker):
        logger_mock = mocker.patch('logging.Logger.debug')
        data = {'banana': True}

        def processor_1(item):
            print("I'm a processor")
            return item

        def processor_2(item):
            if 'banana' in item:
                return
            return item

        config = Configuration(item_processors=[processor_1, processor_2])
        static_spider = StaticSpider(urls=['http://foo.com'], parse=lambda x, y: None, config=config)
        static_spider.save_item(data)

        logger_mock.assert_any_call('item %s was rejected', data)
        out, _ = capsys.readouterr()
        assert "I'm a processor" in out