Beispiel #1
0
    def parse(self, response):
        email = response.css('.user span::text').extract_first()
        # Add to redis
        if email:
            redis_connection.sadd('email', email)

        for href in response.css("a::attr('href')"):
            url = response.urljoin(href.extract())
            yield scrapy.Request(url)
Beispiel #2
0
    def parse_item(self, response):
        email = response.css('.user span::text').extract_first()

        # Add to redis
        redis_connection.sadd(self.name, email)

        # Return to process
        item = EmailCrawlerItem()
        item['email'] = email

        return item