コード例 #1
0
    def _scrape_product_links(self, response):
        link_list = []
        if self.retailer_id:
            data = json.loads(response.body)
            for link in data:
                link = link['product_link']
                link_list.append(link)
            for link in link_list:
                meta = response.meta
                meta['fire'] = True
                # meta['dont_redirect'] = True
                # meta['handle_httpstatus_list'] = ['301']
                # stopping 301 redirects
                product_request = Request(url=link,
                                          meta=meta,
                                          dont_filter=True)
                yield product_request, ProductItem()
        else:
            links = response.xpath(
                '//div[@class="productWrapper"]'
                '//div[@class="productInfo2"]//a[@class="productHdr"]/@href'
            ).extract()

            links = [response.urljoin(x) for x in links]

            for link in links:
                yield link, ProductItem()
コード例 #2
0
ファイル: staples.py プロジェクト: MetaLocatorOrg/scrapy
 def _scrape_product_links(self, response):
     link_data = []
     if self.retailer_id:
         data = json.loads(response.body)
         link_list = data
         for link in link_list:
             link = link['product_link']
             link = urlparse.urljoin(response.url, link)
             link = self._add_akamai(link)
             link_data.append(link)
         for link in link_data:
             yield link, ProductItem()
     else:
         if response.xpath(
                 '//div[@class="stp--new-product-tile-container desktop"]'):
             sku_list = response.xpath(
                 '//div[@class="stp--new-product-tile-container desktop"]/div[@class="tile-container"]/@id'
             ).extract()
             for sku in sku_list:
                 yield self.PRODUCT_URL.format(sku=sku), ProductItem()
         else:
             product_links = response.xpath(
                 '//a[@class="standard-type__product_link"]/@href').extract(
                 )
             for product_link in product_links:
                 yield self._add_akamai(product_link), ProductItem()
コード例 #3
0
    def start_requests(self):
        """Generate Requests from the SEARCH_URL and the search terms."""
        for st in self.searchterms:
            yield Request(
                self.url_formatter.format(
                    self.SEARCH_URL,
                    search_term=urllib.quote_plus(st.encode('utf-8')),
                ),
                meta={
                    'search_term': st,
                    'remaining': self.quantity
                },
            )

        if self.product_url:
            prod = ProductItem()
            prod['is_single_result'] = True
            prod['link'] = self.product_url
            prod['search_term'] = ''
            yield Request(self.product_url,
                          self._parse_single_product,
                          meta={'product': prod})

        if self.products_url:
            urls = self.products_url.split('||||')
            for url in urls:
                prod = ProductItem()
                prod['link'] = url
                prod['search_term'] = ''
                yield Request(url,
                              self._parse_single_product,
                              meta={'product': prod})
コード例 #4
0
ファイル: connection.py プロジェクト: mksingh202/scrapy
 def _scrape_product_links(self, response):
     link_list = []
     if self.retailer_id:
         data = json.loads(response.body)
         for link in data:
             link = link['product_link']
             link_list.append(link)
         for link in link_list:
             url = urlparse.urljoin(response.url, link)
             yield url, ProductItem()
     else:
         links = response.xpath('//div[@class="product-name-list"]/a/@href').extract()
         for link in links:
             url = urlparse.urljoin(response.url, link)
             yield url, ProductItem()
コード例 #5
0
 def _scrape_product_links(self, response):
     link_list = []
     if self.retailer_id:
         data = json.loads(response.body)
         for link in data:
             link = link['product_link']
             link_list.append(link)
         for link in link_list:
             yield link, ProductItem()
     else:
         links = response.css(
             'div[data-selenium="itemDetail"] a[data-selenium="itemHeadingLink"]::attr(href)').extract()
         links = [response.urljoin(x) for x in links]
         for link in links:
             yield link, ProductItem()
コード例 #6
0
ファイル: zones.py プロジェクト: mksingh202/scrapy
 def _scrape_product_links(self, response):
     link_data = []
     if self.retailer_id:
         data = requests.get(self.API_URL.format(retailer_id=self.retailer_id)).json()
         link_list = data
         for link in link_list:
             link = link['product_link']
             link_data.append(link)
         for link in link_data:
             yield link, ProductItem()
     else:
         links = response.xpath('//div[contains(@class, "serp-results")]/div[@class="product"]'
                                '/a[@class="title"]/@href').extract()
         for link in links:
             yield link, ProductItem()
 def _scrape_product_links(self, response):
     self.logger.info("Start parsing products response")
     try:
         json_response = json.loads(response.body.decode("utf-8", "ignore"))
     except TypeError as e:
         self.logger.error(e.message + "Json respone cannot be parsed")
     except Exception as e:
         self.logger.error(e.message)
     else:
         try:
             num_products = int(json_response["shown"])
         except:
             if json_response:
                 for item in json_response:
                     mfr_part_id = self.get_mfr_part_num_from_url(
                         item["product_link"])
                     payload = json.dumps(
                         self.get_product_payload(json_response,
                                                  mfr_part_id))
                     meta = response.meta
                     meta['fire'] = True
                     product_request = scrapy.Request(
                         url=self.product_api,
                         method='POST',
                         body=payload,
                         meta=meta,
                         headers={'Content-Type': 'application/json'},
                         callback=self.parse,
                         dont_filter=True)
                     yield product_request, ProductItem()
         else:
             for i in range(num_products):
                 mfr_part_id = json_response["nugsProducts"][i][
                     "manufacturerPartNumber"]
                 payload = json.dumps(
                     self.get_product_payload(json_response, mfr_part_id))
                 meta = response.meta
                 meta['fire'] = True
                 product_request = scrapy.Request(
                     url=self.product_api,
                     method='POST',
                     body=payload,
                     dont_filter=True,
                     headers={'Content-Type': 'application/json'},
                     meta=meta,
                     callback=self.parse,
                 )
                 yield product_request, ProductItem()
コード例 #8
0
ファイル: very.py プロジェクト: fatica/scrapy
 def _scrape_product_links(self, response):
     link_list = []
     if self.retailer_id:
         data = requests.get(
             self.API_URL.format(retailer_id=self.retailer_id)).json()
         for link in data:
             link = link['product_link']
             link_list.append(link)
         for link in link_list:
             url = link
             yield (url, ProductItem())
     else:
         links = response.xpath(
             "//a[@class='productMainImage']/@href").extract()
         for link in links:
             yield link, ProductItem()
コード例 #9
0
    def _scrape_product_links(self, response):
        link_data = []
        links = response.xpath(
            '//a[contains(@property, "url")]/@href').extract()

        if not links:
            links = response.xpath(
                './/div[@class="product-info"]'
                '/a[contains(@class, "product-title")]/@href').extract()
        if not links:
            links = response.xpath(
                '//a[@class="product-title scTrack pfm"]/@href').extract()
        link_data.extend(links)

        if self.retailer_id:
            data = json.loads(response.body)
            link_list = data
            for link in link_list:
                link = link['product_link']
                link_data.append(link)

        link_data = [urlparse.urljoin(response.url, x) for x in link_data]

        for link in link_data:
            yield link, ProductItem()
コード例 #10
0
ファイル: zones.py プロジェクト: singhmaneesh/scrapy
    def _scrape_product_links(self, response):
        links = response.xpath(
            '//div[contains(@class, "serp-results")]/div[@class="product"]'
            '/a[@class="title"]/@href').extract()

        for link in links:
            yield link, ProductItem()
コード例 #11
0
 def _scrape_product_links(self, response):
     # print "Scrape Product Links Called"
     link_list = []
     if self.retailer_id:
         data = requests.get(
             self.API_URL.format(retailer_id=self.retailer_id)).json()
         for link in data:
             link = link['product_link']
             link_list.append(link)
         for link in link_list:
             url = link
             yield (url, ProductItem())
     else:
         links = response.xpath(
             "//header[@class='productTitle']/a/@href").extract()
         for link in links:
             yield link, ProductItem()
 def _get_products(self, response):
     if "officedepot.com/a/products" in response.url:
         prod = ProductItem(search_redirected_to_product=True)
         yield prod
     else:
         for req_or_prod in super(OfficedepotProductsSpider,
                                  self)._get_products(response):
             yield req_or_prod
コード例 #13
0
ファイル: cdw.py プロジェクト: mksingh202/scrapy
 def _scrape_product_links(self, response):
     link_list = []
     if self.retailer_id:
         data = requests.get(
             self.API_URL.format(retailer_id=self.retailer_id)).json()
         for link in data:
             link = link['product_link']
             link_list.append(link)
         for link in link_list:
             url = urlparse.urljoin(response.url, link)
             yield url, ProductItem()
     else:
         links = response.xpath(
             '//div[@class="search-results"]'
             '//a[@class="search-result-product-url"]/@href').extract()
         for link in links:
             url = urlparse.urljoin(response.url, link)
             yield url, ProductItem()
コード例 #14
0
    def _scrape_product_links(self, response):
        link_list = []
        if self.retailer_id:
            data = json.loads(response.body)
            for link in data:
                link = link['product_link']
                link_list.append(link)
            for link in link_list:
                yield link, ProductItem()
        else:
            data = json.loads(response.body)
            self.total_matches = data['total']
            links = []
            for result in data['results']:
                links.append(result['link'])

            for link in links:
                yield link, ProductItem()
コード例 #15
0
ファイル: officedepot.py プロジェクト: mksingh202/scrapy
    def _scrape_product_links(self, response):
        link_list = []
        if self.retailer_id:
            data = json.loads(response.body)
            for link in data:
                link = link['product_link']
                if 'officedepot' in link:
                    link_list.append(link)
            for link in link_list:
                yield link, ProductItem()
        else:
            links = response.xpath(
                '//div[contains(@class, "descriptionFull")]//a[contains(@class, "med_txt")]/@href'
            ).extract() or response.css(
                '.desc_text a::attr("href")').extract()

            for link in links:
                yield link, ProductItem()
コード例 #16
0
ファイル: en_gb_agros.py プロジェクト: MetaLocatorOrg/scrapy
    def parse_product(self, response):

        product = ProductItem()

        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse brand
        brand = self._parse_brand(response)
        product['brand'] = brand

        # Parse image
        image = self._parse_image(response)
        product['image'] = image

        product['link'] = response.url

        # Parse model
        model = self._parse_model(response)
        product['model'] = model

        product['productstockstatus'] = self.STOCK_STATUS[
            'CALL_FOR_AVAILABILITY']

        product['ean'] = self._parse_ean(response)

        # Parse categories
        categories = self._parse_categories(response)
        product['categories'] = categories

        sku = self._parse_sku(response)
        product['sku'] = sku

        # Parse currencycode
        product['currencycode'] = self._parse_currency_code(response)

        # Set locale
        product['locale'] = 'en-GB'

        # Parse price
        price = self._parse_price(response)
        product['price'] = price

        # Parse gallery
        gallery = self._parse_gallery(response)
        product['gallery'] = gallery

        # Parse features
        features = self._parse_features(response)
        product['features'] = features

        # Parse condition
        product['condition'] = 1

        return product
コード例 #17
0
 def _set_product_meta(self, response):
     prod = ProductItem()
     prod['site'] = self.site_name
     prod['search_term'] = response.meta['search_term']
     prod['total_matches'] = 1
     prod['results_per_page'] = 1
     prod['scraped_results_per_page'] = 1
     prod['ranking'] = 1
     response.meta['product'] = prod
     return response
コード例 #18
0
ファイル: hp.py プロジェクト: mksingh202/scrapy
    def _scrape_product_links(self, response):
        link_list = []
        if self.retailer_id:
            data = json.loads(response.body)
            for link in data:
                link = link['product_link']
                link_list.append(link)
            for link in link_list:
                yield link, ProductItem()
        else:
            links = response.xpath(
                '//div[@class="productWrapper"]'
                '//div[@class="productInfo2"]//a[@class="productHdr"]/@href'
            ).extract()

            links = [response.urljoin(x) for x in links]

            for link in links:
                yield link, ProductItem()
 def parse_product(self, response):
     meta = response.meta.copy()
     product = meta.get('product', ProductItem())
     try:
         json_response = json.loads(response.body.decode("utf-8", "ignore"))
     except TypeError as e:
         self.logger.error(e.message + "Json respone cannot be parsed")
     except Exception as e:
         self.logger.error(e.message)
     else:
         return self.parse_product_item(json_response, product)
コード例 #20
0
ファイル: en_gb_agros.py プロジェクト: MetaLocatorOrg/scrapy
 def _scrape_product_links(self, response):
     link_list = []
     if self.retailer_id:
         data = requests.get(
             self.API_URL.format(retailer_id=self.retailer_id)).json()
         for link in data:
             link = link['product_link']
             link_list.append(link)
         for link in link_list:
             url = urlparse.urljoin(response.url, link)
             meta = response.meta
             meta['fire'] = True
             meta['dont_redirect'] = True
             # stopping 301 redirects
             product_request = Request(url=url, meta=meta, dont_filter=True)
             yield product_request, ProductItem()
     else:
         links = response.css(
             'div.product-list a.ac-product-link::attr(href)').extract()
         for link in links:
             url = urlparse.urljoin(response.url, link)
             yield url, ProductItem()
コード例 #21
0
ファイル: staples.py プロジェクト: singhmaneesh/scrapy
    def _scrape_product_links(self, response):
        links = response.xpath(
            '//a[contains(@property, "url")]/@href').extract()

        if not links:
            links = response.xpath(
                './/div[@class="product-info"]'
                '/a[contains(@class, "product-title")]/@href').extract()
        if not links:
            links = response.xpath(
                '//a[@class="product-title scTrack pfm"]/@href').extract()

        links = [urlparse.urljoin(response.url, x) for x in links]

        for link in links:
            yield link, ProductItem()
コード例 #22
0
ファイル: cdw.py プロジェクト: mksingh202/scrapy
    def parse_product(self, response):
        product = ProductItem()

        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse brand
        brand = self._parse_brand(response)
        product['brand'] = brand

        # Parse image
        image = self._parse_image(response)
        product['image'] = image

        product['link'] = response.url

        # Parse model
        model = self._parse_model(response)
        product['model'] = model

        # Parse categories
        categories = self._parse_categories(response)
        product['categories'] = categories

        # Parse unspec
        unspec = self._parse_unspec(response)
        product['unspec'] = unspec

        # Parse currencycode
        product['currencycode'] = 'USD'

        # Set locale
        product['locale'] = 'en-US'

        # Parse price
        price = self._parse_price(response)
        product['price'] = price

        # Parse sale price
        product['saleprice'] = price

        # Parse in_store
        in_store = self._parse_instore(response)
        product['instore'] = in_store

        # Parse ship to store
        ship_to_store = self._parse_shiptostore(response)
        product['shiptostore'] = ship_to_store

        # Parse shipping phrase
        shipping_phrase = self._parse_shippingphrase(response)
        product['shippingphrase'] = shipping_phrase

        # Parse stock status
        stock_status = self._parse_stock_status(response)
        product['productstockstatus'] = stock_status

        # Parse gallery
        gallery = self._parse_gallery(response)
        product['gallery'] = gallery

        # Parse features

        features = self._parse_features(response)
        product['features'] = features

        # Parse condition
        product['condition'] = 1

        return product
コード例 #23
0
ファイル: officedepot.py プロジェクト: mksingh202/scrapy
    def parse_product(self, response):
        meta = response.meta
        product = meta.get('product', ProductItem())

        # Parse locate
        locale = 'en_US'
        cond_set_value(product, 'locale', locale)

        # Parse name
        name = self.parse_name(response)
        cond_set(product, 'name', name, conv=string.strip)

        # Parse image
        image = self.parse_image(response)
        cond_set(product, 'image', image)

        # Parse brand
        brand = self.parse_brand(response)
        cond_set_value(product, 'brand', brand)

        # Parse sku
        sku = self.parse_sku(response)
        cond_set_value(product, 'sku', sku)

        # Parse price
        price = self.parse_price(response)
        cond_set_value(product, 'price', price)

        # Parse sale price
        product['saleprice'] = price

        # Parse model
        model = self._parse_model(response)
        cond_set_value(product, 'model', model)

        # Parse gallery
        gallery = self._parse_gallery(response)
        product['gallery'] = gallery

        # Parse stock status
        oos = self._parse_product_stock_status(response)
        cond_set_value(product, 'productstockstatus', oos)

        # Parse categories
        categories = self._parse_categories(response)
        cond_set_value(product, 'categories', categories)

        # Parse manufacturer
        manufacturer = self._parse_manufacturer(response)
        cond_set_value(product,
                       'manufacturer',
                       manufacturer,
                       conv=string.strip)

        # Parse shipping phrase
        shipping_phrase = self._parse_shippingphrase(response)
        product['shippingphrase'] = shipping_phrase

        # Parse ship to store
        ship_to_store = self._parse_shiptostore(response)
        product['shiptostore'] = ship_to_store

        # Parse retailer_key
        retailer_key = self._parse_retailer_key(response)
        product['retailer_key'] = retailer_key

        # Parse features
        features = self._parse_features(response)
        product['features'] = features

        return product
コード例 #24
0
ファイル: connection.py プロジェクト: singhmaneesh/scrapy
    def parse_product(self, response):
        product = ProductItem()

        self.link.append(response.url)

        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse brand
        brand = self._parse_brand(response)
        product['brand'] = brand

        # Parse image
        image = self._parse_image(response)
        product['image'] = image

        # Parse link
        product['link'] = response.url

        # Parse model
        model = self._parse_model(response)
        product['model'] = model

        # Parse upc
        upc = self._parse_upc(response)
        product['upc'] = upc

        # Parse ean
        product['ean'] = None

        # Parse currencycode
        product['currencycode'] = 'USD'

        # Set locale
        product['locale'] = 'en-US'

        # Parse price
        price = self._parse_price(response)
        product['price'] = price

        # Parse sale price
        product['saleprice'] = price

        # Parse sku
        sku = self._parse_sku(response)
        product['sku'] = sku

        # Parse retailer_key
        retailer_key = self._parse_retailer_key(response)
        product['retailer_key'] = retailer_key

        # Parse in_store
        in_store = self._parse_instore(response)
        product['instore'] = in_store

        # Parse ship to store
        ship_to_store = self._parse_shiptostore(response)
        product['shiptostore'] = ship_to_store

        # Parse shipping phrase
        shipping_phrase = self._parse_shippingphrase(response)
        product['shippingphrase'] = shipping_phrase

        # Parse stock status
        stock_status = self._parse_stock_status(response)
        product['productstockstatus'] = stock_status

        # Parse gallery
        product['gallery'] = None

        # Parse features

        features = self._parse_features(response)
        product['features'] = features

        # Parse condition
        product['condition'] = 1

        return product
コード例 #25
0
ファイル: very.py プロジェクト: fatica/scrapy
    def parse_product(self, response):
        product = ProductItem()
        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse brand
        brand = self._parse_brand(response)
        product['brand'] = brand

        # Parse image
        image = self._parse_image(response)
        product['image'] = image
        product['link'] = response.url

        # Parse model
        model = self._parse_model(response)
        product['model'] = model
        product['mpn'] = model

        ean = self._parse_ean(response)
        product['ean'] = ean

        # Parse categories
        categories = self._parse_categories(response)
        product['categories'] = categories

        # Parse unspec DOUBT DOUBT
        # unspec = self._parse_unspec(response)
        # product['unspec'] = unspec

        # Parse currencycode
        product['currencycode'] = 'GBP'

        # Set locale
        product['locale'] = 'en-UK'

        # Parse price
        price = self._parse_price(response)
        product['price'] = price

        # Parse price
        sku = self._parse_sku(response)
        product['sku'] = sku

        # Parse retailer_key
        retailer_key = self._parse_retailer_key(response)
        product['retailer_key'] = retailer_key

        # Parse stock status
        stock_status = self._parse_stock_status(response)
        product['productstockstatus'] = stock_status

        # Parse gallery
        gallery = self._parse_gallery(response)
        product['gallery'] = gallery

        # Parse features
        features = self._parse_features(response)
        product['features'] = features

        # Parse condition
        product['condition'] = 1

        return product
コード例 #26
0
ファイル: staples.py プロジェクト: MetaLocatorOrg/scrapy
    def parse_product(self, response):
        meta = response.meta.copy()
        product = meta.get('product', ProductItem())

        if 'Good thing this is not permanent' in response.body_as_unicode():
            # product['not_found'] = True
            return

        maintenance_error = response.xpath(
            './/*[contains(text(), "The site is currently under maintenance.")]'
        )
        if maintenance_error:
            self.log(
                "Website under maintenance error, retrying request: {}".format(
                    response.url), WARNING)
            return Request(response.url,
                           callback=self.parse_product,
                           meta=response.meta,
                           dont_filter=True)

        if response.status == 429:
            response = requests.get(url=response.url, timeout=5)

        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse image
        image = self._parse_image(response)
        product['image'] = image

        # Parse model
        model = self._parse_model(response)
        product['model'] = model

        # Parse upc
        # upc = self._parse_upc(response)
        # product['upc'] = upc

        # Parse currencycode
        product['currencycode'] = 'USD'

        # Set locale
        product['locale'] = 'en-US'

        # Parse sku
        sku = self._parse_sku(response)
        product['sku'] = sku

        # Parse manufacturer
        # manufacturer = self._parse_manufacturer(response)
        # product['manufacturer'] = manufacturer

        # Parse categories
        categories = self._parse_categories(response)
        product['categories'] = categories

        # Parse retailer_key
        retailer_key = self._parse_retailer_key(response)
        product['retailer_key'] = retailer_key

        # Parse in_store
        in_store = self._parse_instore(response)
        product['instore'] = in_store

        # Parse stock status
        response.meta['product'] = product
        oos = self._parse_product_stock_status(response)
        cond_set_value(product, 'productstockstatus', oos)

        # Parse ship to store
        # ship_to_store = self._parse_shiptostore(response)
        # product['shiptostore'] = ship_to_store

        # Parse gallery
        product['gallery'] = self._parse_gallery(response)

        # Parse features
        # features = self._parse_features(response)
        # product['features'] = features

        # Parse condition
        product['condition'] = 1

        # Parse price
        price = self._parse_price(response)
        product['price'] = price
        return product
コード例 #27
0
    def parse_product(self, response):
        meta = response.meta.copy()
        product = meta.get('product', ProductItem())

        if 'Good thing this is not permanent' in response.body_as_unicode():
            product['not_found'] = True
            return product

        maintenance_error = response.xpath('.//*[contains(text(), "The site is currently under maintenance.")]')
        if maintenance_error:
            self.log("Website under maintenance error, retrying request: {}".format(response.url), WARNING)
            return Request(response.url, callback=self.parse_product, meta=response.meta, dont_filter=True)

        if response.status == 429:
            response = requests.get(url=response.url, timeout=5)

        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse image
        image = self._parse_image(response)
        product['image'] = image

        # Parse model
        model = self._parse_model(response)
        product['model'] = model

        # Parse upc
        upc = self._parse_upc(response)
        product['upc'] = upc

        # Parse currencycode
        product['currencycode'] = 'USD'

        # Set locale
        product['locale'] = 'en-US'

        # Parse sku
        sku = self._parse_sku(response)
        product['sku'] = sku

        # Parse manufacturer
        manufacturer = self._parse_manufacturer(response)
        product['manufacturer'] = manufacturer

        # Parse categories
        categories = self._parse_categories(response)
        product['categories'] = categories

        # Parse retailer_key
        retailer_key = self._parse_retailer_key(response)
        product['retailer_key'] = retailer_key

        # Parse in_store
        in_store = self._parse_instore(response)
        product['instore'] = in_store

        # Parse ship to store
        ship_to_store = self._parse_shiptostore(response)
        product['shiptostore'] = ship_to_store

        # Parse gallery
        product['gallery'] = self._parse_gallery(response)

        # Parse features
        features = self._parse_features(response)
        product['features'] = features

        # Parse condition
        product['condition'] = 1

        # Parse price
        js_data = self.parse_js_data(response)
        try:
            if product.get("sku", ""):
                prod_doc_key = js_data['prod_doc_key']
                prod_doc_key = prod_doc_key.split("/")[:-1]
                prod_doc_key.append(product.get("sku", ""))
                prod_doc_key = "/".join(prod_doc_key)
            else:
                prod_doc_key = js_data['prod_doc_key']
            return Request(
                url=self.PRICE_URL.format(sku=sku,
                                          metadata__coming_soon_flag=js_data['metadata']['coming_soon_flag'],
                                          metadata__price_in_cart_flag=js_data['metadata']['price_in_cart_flag'],
                                          prod_doc_key=prod_doc_key,
                                          metadata__product_type__id=js_data['metadata']['product_type']['id'],
                                          metadata__preorder_flag=js_data['metadata']['preorder_flag'],
                                          street_date=time.time(),
                                          metadata__channel_availability_for__id=
                                          js_data['metadata']['channel_availability_for']['id'],
                                          metadata__backorder_flag=js_data['metadata']['backorder_flag']),
                dont_filter=True,
                callback=self._parse_price,
                meta=meta,
                headers={"Referer": None, "X-Requested-With": "XMLHttpRequest",
                         'User-Agent': 'Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)'}
            )
        except Exception as e:
            self.log("Error while forming request for base product data: {}".format(traceback.format_exc()), WARNING)
            return product
コード例 #28
0
ファイル: en-gb_john_lewis.py プロジェクト: fatica/scrapy
    def parse_product(self, response):

        product = ProductItem()

        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse brand
        brand = self._parse_brand(response)
        product['brand'] = brand

        # Parse image
        image = self._parse_image(response)
        product['image'] = image

        product['link'] = response.url

        # Parse model
        model = self._parse_model(response)
        product['model'] = model

        # Parse categories
        categories = self._parse_categories(response)
        product['categories'] = categories

        # Parse sku
        sku = self._parse_sku(response)
        product['sku'] = sku

        # Parse retailer key
        retailer_key = self._parse_retailer_key(response)
        product['retailer_key'] = retailer_key

        # Parse retailer key 2
        retailer_key2 = self._parse_retailer_key2(response)
        product['ean'] = retailer_key2

        # Parse mpn
        mpn = self._parse_mpn(response)
        product['sku'] = mpn

        # Parse currencycode
        product['currencycode'] = self._parse_currency_code(response)

        # Set locale
        product['locale'] = 'en-gb'

        # Parse price
        price = self._parse_price(response)
        product['price'] = price

        # Parse ship to store
        ship_to_store = 1
        product['shiptostore'] = ship_to_store

        # Parse stock status
        stock_status = self._parse_stock_status(response)
        product['productstockstatus'] = stock_status

        # Parse gallery
        gallery = self._parse_gallery(response)
        product['gallery'] = gallery

        # Parse features
        features = self._parse_features(response)
        product['features'] = features

        # Parse condition
        product['condition'] = 1

        return product
コード例 #29
0
    def parse_product(self, response):
        product = response.meta.get('product', ProductItem())

        # Parse name
        name = self._parse_name(response)
        product['name'] = name

        # Parse brand
        product['brand'] = self._parse_brand(response)

        # Parse image
        image = self._parse_image(response)
        product['image'] = image

        # Parse link
        link = response.url
        product['link'] = link

        # Parse model
        product['model'] = self._parse_model(response)

        # Parse upc
        product['upc'] = self._parse_upc(response)

        # Parse ean
        product['ean'] = None

        # Parse currencycode
        product['currencycode'] = 'USD'

        # Set locale
        product['locale'] = 'en-US'

        # Parse price
        product['price'] = self._parse_price(response)

        # Parse sale price
        product['saleprice'] = self._parse_sale_price(response)

        # Parse sku
        product['sku'] = self._parse_sku(response)

        # Parse retailer_key
        product['retailer_key'] = self._parse_retailer_key(response)

        # Parse in_store
        product['instore'] = self._parse_instore(response)

        # Parse productstockstatus
        product['productstockstatus'] = self._parse_stock_status(response)

        # Parse categories
        product['categories'] = self._parse_categories(response)

        # Parse gallery
        product['gallery'] = self._parse_gallery(response)

        # Parse features
        product['features'] = self._parse_features(response)

        # Parse condition
        product['condition'] = self._parse_condition(response)

        yield product