Python SainsburysItem Examples

Programming Language: Python

Namespace/Package Name: sainsburys.items

Class/Type: SainsburysItem

Examples at hotexamples.com: 3

Python SainsburysItem - 3 examples found. These are the top rated real world Python examples of sainsburys.items.SainsburysItem extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SainsburysItem(3)

Frequently Used Methods

SainsburysItem (3)

Example #1

Show file

File: basic.py Project: Arasu378/sainsburys

 def parse_product_detail(self, response):
     item = SainsburysItem()
     item['url'] = response.url
     item['product_name'] = response.xpath('//h1/text()').extract()[0].strip()
     item['product_image'] = response.urljoin(response.xpath('//div[@id="productImageHolder"]/img/@src').extract()[0])
     item['price_per_unit'] =  response.xpath('//div[@class="pricing"]/p[@class="pricePerUnit"]/text()').extract()[0].strip()
     units = response.xpath('//dic[@class="pricing"]/span[@class="pricePerUnitUnit"]').extract()
     if units:
         item['unit'] = units[0].strip()
     ratings = response.xpath('//label[@class="numberOfReviews"]/img/@alt').extract()
     if ratings:
         item['rating'] = ratings[0]
     reviews = response.xpath('//label[@class="numberOfReviews"]').extract()
     if reviews:
         reviews = reviews_pattern.findall(reviews[0])
         if reviews:
             item['product_reviews'] = reviews[0]
     item['item_code'] = item_code_pattern.findall(response.xpath('//p[@class="itemCode"]/text()').extract()[0].strip())[0]
     nutritions = {}
     for row in response.xpath('//table[@class="nutritionTable"]/tr'):
         th = row.xpath('./th/text()').extract()
         if not th:
             th = ['Energy kcal']
         td = row.xpath('./td[1]/text()').extract()[0]
         nutritions[th[0]] = td
         item['nutritions'] = nutritions
     item['product_origin'] = ' '.join(response.xpath('.//h3[@class="productDataItemHeader" and text()="Country of Origin"]/following-sibling::div[1]/p/text()').extract())
     yield item
     pass

Example #2

Show file

 def parse1(self, response):
     for product in response.xpath(
             "//ul[@class='productLister gridView']/li[@class='gridItem']/div[contains(@class,'product ')]"
     ):
         loader = ItemLoader(item=SainsburysItem(), selector=product)
         sainsburys_img_url = response.urljoin(
             product.xpath(".//div/div/h3/a/img/@src").get())
         sainsburys_prod_name = product.xpath(
             "normalize-space(.//div/div/h3/a/text()[1])").get()
         name = self.cleanup(sainsburys_prod_name)
         loader.add_value('image_urls', sainsburys_img_url)
         loader.add_value('image_name', name)
         yield loader.load_item()
     lnk2 = response.xpath(
         "//div[@class='pagination']/ul/li[@class='next']/a/@href").get()
     if lnk2:
         yield SplashRequest(url=lnk2,
                             callback=self.parse1,
                             endpoint="execute",
                             args={
                                 'timeout': 1800,
                                 'lua_source': self.script
                             })

Example #3

Show file

    def parse_product_details_bs(self, response):
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(response.text, 'lxml')
        item = SainsburysItem()
        h1 = soup.find('h1')
        if h1:
            item['product_name'] = h1.text.strip()

        pricing = soup.find('div', class_='pricing')
        if pricing:
            p = pricing.find('p', class_='pricePerUnit')
            unit = pricing.find('span', class_='pricePerUnitUnit')
            if p:
                item['price_per_unit'] = p.text.strip()
            if unit:
                item['unit'] = unit.text.strip()

        label = soup.find('label', class_='numberOfReviews')
        if label:
            img = label.find('img', alt=True)
            if img:
                item['rating'] = img['alt'].strip()
            reviews = reviews_pattern.findall(label.text.strip())
            if reviews:
                item['product_reviews'] = reviews[0]
        else:
            rating = soup.find('span', {'itemprop': 'ratingValue'})
            if rating:
                item['rating'] = rating.text.strip()
            reviews = soup.find('meta', {'itemprop': 'reviewCount'})
            if reviews and reviews.hasattr('content'):
                item['product_reviews'] = reviews['content'].strip()

        item_code = soup.find('p', class_='itemCode')
        if item_code:
            item_codes = item_code_pattern.findall(item_code.text.strip())
            if item_codes:
                item['item_code'] = item_codes[0]

        table = soup.find('table', class_='nutritionTable')
        if table:
            nutritions = {}
            rows = table.findAll('tr')
            for tr in rows[1:]:
                th = tr.find('th', class_='rowHeader')
                td = tr.find('td')
                if not td:
                    # print(tr.prettify())
                    # print(url)
                    continue
                if not th:
                    nutritions['Energy kcal'] = td.text
                else:
                    nutritions[th.text.replace('-', '').strip()] = td.text
            item['nutritions'] = nutritions

        product_origin_header = soup.find('h3',
                                          class_='productDataItemHeader',
                                          text='Country of Origin')
        if product_origin_header:
            # find next sibling of class 'productText'
            product_text = product_origin_header.find_next_sibling(
                'div', class_='productText')
            if product_text:
                origin_info = []
                for p in product_text.find_all('p'):
                    origin_info.append(p.text.strip())
                item['product_origin'] = '; '.join(origin_info)

        yield item