Beispiel #1
0
    def parse(self, response):
        for deal in response.css('div.sku'):
            item = ScrapyAppItem()
            url = deal.css('a.link::attr(href)').extract_first()

            title = deal.css('a.link h2.title span.name::text').extract_first()
            image_url = deal.css('a.link div.image-wrapper img::attr(data-src)'
                                 ).extract_first()
            percentage = deal.css(
                'a.link div.price-container span.sale-flag-percent::text'
            ).extract_first()
            item['web_source'] = 'jumia'
            item['deal_title'] = title
            item['deal_image_url'] = image_url
            if percentage:
                percentage = percentage.split("%", -1)[0]
                percentage = percentage.split("-", 1)[1]
                item['deal_percentage'] = percentage
            else:
                item['deal_percentage'] = '0'
            obj = ScrapyItem.objects.filter(
                deal_title=item['deal_title']).first()
            if not obj:
                scrapy_item = ScrapyItem()
                scrapy_item.web_source = item['web_source']
                scrapy_item.deal_title = item['deal_title']
                scrapy_item.deal_image_url = item['deal_image_url']
                scrapy_item.deal_percentage = item['deal_percentage']
                scrapy_item.save()

            url = response.urljoin(url)
            yield scrapy.Request(url=url,
                                 callback=self.parse_details,
                                 meta=item)
Beispiel #2
0
    def parse(self, response):
        for deal in response.css('div.block-grid-large'):
            item = ScrapyAppItem()
            url = deal.css(
                'div.img-bucket a.img-link::attr(href)').extract_first()
            title = deal.css(
                'div ul li.title-row h6 span a::attr(title)').extract_first()
            image_url = deal.css(
                'div.img-bucket a.img-link img::attr(data-src)').extract_first(
                )
            percentage = deal.css(
                'div.img-bucket a.img-link div.discounts-box span.discount::text'
            ).extract_first()

            item['web_source'] = 'souq'
            item['deal_title'] = title
            item['deal_image_url'] = image_url
            if percentage:
                item['deal_percentage'] = percentage.split("%", -1)[0]
            else:
                item['deal_percentage'] = '0'
            obj = ScrapyItem.objects.filter(
                deal_title=item['deal_title']).first()
            if not obj:
                scrapy_item = ScrapyItem()
                scrapy_item.web_source = item['web_source']
                scrapy_item.deal_title = item['deal_title']
                scrapy_item.deal_image_url = item['deal_image_url']
                scrapy_item.deal_percentage = item['deal_percentage']
                scrapy_item.save()

            url = response.urljoin(url)
            yield scrapy.Request(url=url,
                                 callback=self.parse_details,
                                 meta=item)
Beispiel #3
0
 def process_item(self, item, spider):
     obj = ScrapyItem.objects.filter(deal_title=item['deal_title']).first()
     if not obj:
         scrapy_item = ScrapyItem()
         scrapy_item.unique_id = self.unique_id
         scrapy_item.web_source = item['web_source']
         scrapy_item.deal_title = item['deal_title']
         scrapy_item.deal_image_url = item['deal_image_url']
         scrapy_item.deal_percentage = item['deal_percentage']
         scrapy_item.save()
     return item