def parse(self, response):
        date_selector = response.css('div.date')

        info_loader = ItemLoader(item=ComercioNew(), selector=date_selector)

        info_loader.default_output_processor = TakeFirst()

        info_loader.add_css('Date', 'div::text')

        title_selector = response.css('div.title')

        info_loader.selector = title_selector

        info_loader.add_css('Title', 'h1::text')

        views_selector = response.css('div.social-nav')

        info_loader.selector = views_selector

        info_loader.add_css('Views', 'div.pageviews::text')

        reactions_selector = response.css('div.rating>div.score')

        reactions_names = [
            'Indignado', 'Triste', 'Indiferente', 'Sorprendido', 'Contento'
        ]

        for i in range(0, 5):

            info_loader.selector = reactions_selector[i]

            info_loader.add_css(reactions_names[i], '.number::text')

        editor_selector = response.css('div.right-col>div.info')

        info_loader.selector = editor_selector

        info_loader.add_css('Editor', 'div.signature>div::text')

        info_selector = response.css('div.breadcrumbs')

        info_loader.selector = info_selector

        info_loader.add_css('Category', 'a::text')

        info_loader.add_css('Tag', 'a.highlighted::text')

        yield info_loader.load_item()
Esempio n. 2
0
    def parse_product(self, response):
        selector = response.selector.xpath(
            '//section[@id="main"]/div[@class="row"]')
        loader = ItemLoader(item=response.meta["item"], selector=selector)

        loader.add_xpath('detail_name', './/h4[@class="name_detail"]/text()')
        loader.add_xpath('brand',
                         './/div[@class="product_manufacturer->name"]/text()')

        loader.add_xpath(
            'description',
            './/div[@class="product-description-short-detail" and '
            '@itemprop="description"]/p/descendant-or-self::*/text() ')

        _loader = loader.nested_xpath('//select[@id="group_1"]/option')
        _loader.add_xpath('size_format', './/text()')

        # loader.add_xpath('price', './/span[@itemprop="price"]/text()')

        loader.selector = response.selector.xpath(
            '//div[@class="tabs"]/div[@class="tab-content" and @id="tab-content"]'
        )
        loader.add_xpath(
            'detail_description',
            './/div[@class="elementor-accordion-content elementor-clearfix" and '
            '@data-section="1"]/ol/descendant-or-self::*/text()')
        loader.add_xpath(
            'detail_ingredients',
            './/div[@class="elementor-accordion-content elementor-clearfix" and @data-section="2"]/p/text()'
        )
        loader.add_xpath(
            'nutritional_facts',
            './/div[@class="elementor-accordion-content elementor-clearfix" and '
            '@data-section="3"]/descendant-or-self::*/text()')

        # loader.add_xpath('nutritional_facts_img_url', './/*[@id="collapseThree"]/div/p/img/@src')

        loader.add_xpath(
            'feed_guide',
            './/div[@class="elementor-accordion-content elementor-clearfix" and '
            '@data-section="4"]/p/descendant-or-self::*/text()')

        loader.add_xpath(
            'feed_guide_img_url',
            './/div[@class="elementor-accordion-content elementor-clearfix" and @data-section="4"]//img/@src'
        )

        loader.add_xpath(
            'extra_information_keys',
            './/dl[@class="data-sheet"]/dt[@class="name"]/text()')
        loader.add_xpath(
            'extra_information_values',
            './/dl[@class="data-sheet"]/dd[@class="value"]/text()')

        self.log(f'finished parsing product page {response.url}')

        return loader.load_item()
Esempio n. 3
0
    def parse_product(self, response):
        selector = response.selector.xpath('//section[@class="row"]')
        loader = ItemLoader(item=response.meta["item"], selector=selector)
        # meta_loader = ItemLoader(item=ProductItemMeta(), selector=selector)
        loader.add_xpath('detail_name', './/h1[@itemprop="name"]/text()')
        loader.add_xpath('brand', './/h5[@itemprop="brand"]/text()')
        loader.add_xpath(
            'description',
            './/div[@class="col-xs-12 col-sm-12 col-md-12 col-lg-12"]/p/text()'
        )

        _loader = loader.nested_xpath('//select[@id="__sku"]/option')
        _loader.add_xpath('price', './/@data-priceformat')
        _loader.add_xpath('size_format', './/text()')

        loader.selector = response.selector.xpath(
            '//div[@id="accordion"]/div[@class="panel panel-default"]')
        loader.add_xpath(
            'detail_description',
            './/div[@id="collapseOne"]/div/descendant-or-self::*/text()')
        loader.add_xpath(
            'detail_ingredients',
            './/div[@id="collapseTwo"]/div/descendant-or-self::*/text()')
        loader.add_xpath(
            'nutritional_facts',
            './/div[@id="collapseThree"]/div/descendant-or-self::*/text()')
        loader.add_xpath('nutritional_facts_img_url',
                         './/*[@id="collapseThree"]/div/p/img/@src')

        loader.selector = response.selector.xpath(
            '//*[@id="review"]/div/div/div')
        loader.add_xpath('customer_review_header',
                         './/h3[@class="panel-title"]/text()')
        ratings = []
        for _ in loader.selector:
            rating = ''.join(_.xpath('.//label/text()').getall())
            ratings.append(rating)
        loader.add_value('customer_review_rating', ratings)
        loader.add_xpath(
            'customer_review',
            './/blockquote[@class="blockquote-reverse"]/p/text()')

        self.log(f'finished parsing product page {response.url}')
        return loader.load_item()
Esempio n. 4
0
 def parse_product_item_meta(self, response):
     selector = response.selector.xpath('//*[@id="collapseOne"]/')
     loader = ItemLoader(item=response.meta["item"], selector=selector)
     loader.selector = response.selector.xpath('//*[@id="collapseOne"]/')
     loader.add_xpath('detail_description',
                      './/div/descendant-or-self::*/text()')
     self.log('>>>>>>>>>>>>>>>>>>> BEGIN >>>>>>>>>>>>>>>>>>>')
     self.log(loader.load_item())
     self.log('>>>>>>>>>>>>>>>>>>> END >>>>>>>>>>>>>>>>>>>')
     self.log('parsing product item meta')
     return loader.load_item()
    def parse_category(self, response):

        next_page = response.css(
            'link[rel="next"]::attr(href)').extract_first()
        if next_page:
            next_page = response.urljoin(next_page)
            yield scrapy.Request(next_page, callback=self.parse_category)

        for item in response.css(
                'div[class^="product-grid-item grid__item xlarge-up--one-quarter one-half"]'
        ):
            l = ItemLoader(item=Product(), response=response)
            l.add_value('category', self.get_category(response))
            if text_fields := item.css('a > div[class="product-text"]'):
                l.selector = text_fields
                l.add_css('name', 'p.title *::text')
                l.add_css('brand', 'h2 *::text')
                if product_on_sale := item.css('p[class*="sale"]'):
                    l.selector = product_on_sale
                    l.add_css('price', 's *::text')
                    l.add_css('sale_price', 'span[itemprop="price"] *::text')
                else:
                    l.add_css('price', 'p.price > span *::text')
Esempio n. 6
0
 def parse(self, response):
     response.selector.remove_namespaces()
     articles = response.xpath(xpath.ARTICLE_ITEM)
     for article in articles:
         article_loader = ItemLoader(item=ArticleItem(), response=response)
         article_loader.selector = article
         article_loader.add_xpath('title', xpath.ARTICLE_TITLE)
         article_link = article.xpath(xpath.ARTICLE_LINK).extract_first()
         article_loader.add_value('link', article_link)
         article_loader.add_xpath('description', xpath.ARTICLE_DESCRIPTION)
         article_author = article.xpath(xpath.ARTICLE_AUTHOR).extract_first()
         if article_author is None:
             article_author = article.xpath(xpath.ARTICLE_AUTHOR_ALTERNATIVE).extract_first()
         article_loader.add_value('author', article_author)
         article_loader.add_xpath('publication_date', xpath.ARTICLE_PUBLICATION_DATE)
         article_categories = " | ".join(article.xpath(xpath.ARTICLE_CATEGORIES).getall())
         article_loader.add_value('categories', article_categories)
         article_loader.add_xpath('image_url', xpath.ARTICLE_IMAGE_URL)
         article_loader.add_xpath('credit', xpath.ARTICLE_CREDIT)
         article_loader.add_xpath('guid', xpath.ARTICLE_GUID)
         article_request = self._create_article_request(article_link, article_loader)
         yield article_request