コード例 #1
0
    def _parse_json(self, response):
        product = response.meta['product']

        data = json.loads(response.body_as_unicode())
        prod_data = data[0]
        upc = prod_data.get('skus')[0].get('productNumber')

        prices = set(x.get('currentPrice') for x in prod_data.get('skus'))
        if len(prices) < 2:
            product['upc'] = upc
            price = prod_data.get('currentPrice')
            cond_set_value(product, 'price',
                           Price('USD', price) if price else None)
            cond_set_value(product, 'title', prod_data.get('name'))
            return product

        self.log("Product %s PRICES= %s " % (upc, prices), DEBUG)
        products = []
        for skudata in prod_data.get('skus'):
            new_product = product.copy()
            new_product['upc'] = skudata.get('productNumber')
            price = skudata.get('currentPrice')
            cond_set_value(new_product, 'price',
                           Price('USD', price) if price else None)
            new_product['title'] = prod_data.get('name')
            new_product['model'] = "size:" + skudata.get('product_size')

            products.append(new_product)
        return products
コード例 #2
0
    def _get_json_data(self, item):
        product = SiteProductItem()
        item = item['productInfo']

        if 'salePrice' in item['priceInfo']:
            price = re.findall('(/?\d+.\d+)',
                               item['priceInfo']['salePrice'])
            if len(price) == 1:
                product['price'] = Price(price=float(price[0]),
                                         priceCurrency='USD')
            else:
                product['price'] = Price(price=float(price[-1]),
                                         priceCurrency='USD')
        elif 'regularPrice' in item['priceInfo']:
            price = re.findall('(/?\d+.\d+)',
                               item['priceInfo']['regularPrice'])
            if len(price) == 1:
                product['price'] = Price(price=float(price[0]),
                                         priceCurrency='USD')
            else:
                product['price'] = Price(price=float(price[-1]),
                                         priceCurrency='USD')

        messages = item.get('channelAvailability', [])
        for mes in messages:
            if 'displayText' in mes:
                if 'Not sold online' in mes['displayText']:
                    product['is_in_store_only'] = True
                if 'Out of stock online' in mes['displayText']:
                    product['is_out_of_stock'] = True

        upc = item.get('upc')
        cond_set_value(product, 'upc', upc)

        return product
コード例 #3
0
 def parse_price(self, response):
     price = response.xpath(
         '//p[contains(@id, "pd-price")]/text()').extract()
     if price:
         price = self.clear_text(price[0].replace('NOW',
                                                  '').replace('$', ''))
         return Price(price=price, priceCurrency="USD")
     else:
         return Price(price=0.00, priceCurrency="USD")
コード例 #4
0
 def _parse_price(response):
     """Parse price"""
     currency = response.xpath(
         '*//*[@itemprop="priceCurrency"]/@content').extract()
     price = response.xpath('*//*[@itemprop="price"]/text()').re(
         FLOATING_POINT_RGEX)
     if currency and price:
         return Price(price=price[0], priceCurrency=currency[0])
     else:
         return Price(price=0, priceCurrency='USD')
コード例 #5
0
ファイル: newegg.py プロジェクト: lifelonglearner127/tmtext
    def parse_marketplace_json(self, response):
        marketplaces = []
        meta = response.meta
        product = meta['product']
        data = response.body_as_unicode()
        seller_id = meta.get('seller_id')
        product_id = meta.get('product_id')
        try:
            data = is_empty(
                re.findall(r'parentItem":"{0}"(.*)?'.format(seller_id),
                           data)).replace('\\', '')
            marketplace = Selector(text=data)
        except:
            return product

        sellers_noline = list(
            set(
                marketplace.xpath(
                    "//tr[contains(@class, featured)]/td/img/@alt").extract()))
        sellers_line = marketplace.xpath(
            "//tr/td[@class='seller']/a[1]/@title").extract()
        new_sellers_line = self.remove_duplicate(sellers_line)
        sellers = sellers_noline + new_sellers_line
        price_int = marketplace.xpath(
            "//ul[contains(@class, 'price')]/li[@class='price-current ']/strong/text()"
        ).extract()
        price_sup = marketplace.xpath(
            "//ul[contains(@class, 'price')]/li[@class='price-current ']/sup/text()"
        ).extract()
        for i, item in enumerate(sellers):
            try:
                price = price_int[i] + price_sup[i]
            except:
                price = 0.0
            if price:
                price = Price(price=price, priceCurrency="USD")
            else:
                price = Price(price=0.0, priceCurrency="USD")
            marketplaces.append({"price": price, "name": item})

            if marketplaces:
                product["marketplace"] = marketplaces

        reqs = meta.get('reqs', [])
        reqs.append(
            Request(url=self.RELATED_PRODUCTS.format(product_id=product_id,
                                                     seller_id=seller_id),
                    dont_filter=True,
                    callback=self.parse_related_product,
                    meta=meta))

        if reqs:
            return self.send_next_request(reqs, response)

        return product
コード例 #6
0
    def parse_price(self, response):

        price = response.xpath(
            '//meta[contains(@itemprop, "price")]/@content').extract()
        currency = response.xpath(
            '//meta[contains(@itemprop, "priceCurrency")]/@content').extract()

        if price and currency:
            price = Price(price=price[0], priceCurrency=currency[0])
        else:
            price = Price(price=0.00, priceCurrency="USD")

        return price
コード例 #7
0
    def _parse_price(self, response):
        price_sel = response.xpath('//meta[@itemprop="price"]' '/@content')
        price = is_empty(price_sel.extract())

        price_currency_sel = response.xpath('//meta[@itemprop="priceCurrency"]'
                                            '/@content')
        price_currency = is_empty(price_currency_sel.extract())

        if price and price_currency:
            price = Price(price=price, priceCurrency=price_currency)
        else:
            price = Price(price=0.00, priceCurrency="GBP")

        return price
コード例 #8
0
ファイル: dockers.py プロジェクト: lifelonglearner127/tmtext
    def parse_price(self, response):
        if self.js_data:
            price = self.js_data['colorid'][self.product_id]['price']
            for price_data in price:
                if price_data['il8n'] == 'now':
                    price = price_data['amount']
            currency = is_empty(re.findall(r'currency":"(\w+)"', response.body_as_unicode()))

            if price and currency:
                price = Price(price=price, priceCurrency=currency)
            else:
                price = Price(price=0.00, priceCurrency="USD")

            return price
コード例 #9
0
    def _populate_from_html(self, response, product):
        _populate_from_open_graph_product(response, product)
        cont = '#productDetailsLeftSidebar .inner-container '
        cond_set(product, 'title',
                 response.css(cont + 'h1::text').extract(), unicode.strip)
        if not product.get("title"):
            title = response.xpath(
                "//h1[contains(@class, 'prod_name')]/text()").extract()
            if title:
                cond_set(product, 'title', title, unicode.strip)

        regex = "\/_\/([^?$\s]+)"
        reseller_id = re.findall(regex, response.url)
        reseller_id = reseller_id[0] if reseller_id else None
        cond_set_value(product, "reseller_id", reseller_id)

        price = response.xpath(
            '//div[@id="productPrice"]' \
            '/div[contains(@class, "display_price")]/input/@value |'
            '//div[@id="productPrice"]/span[last()]/text()'
        ).extract()
        if price:
            price = price[0].replace("$", "").strip()
            product["price"] = Price(priceCurrency='USD', price=price)

        model = response.css('#storeStyleNumber::text').extract()
        if model:
            model = re.search(r'Store Style #:\xa0(.+)', model[0])
            cond_set_value(product, 'model', model,
                           lambda model: model.group(1))
        self._populate_related_products(response, product)

        self._populate_hardcoded_fields(product)
コード例 #10
0
    def parse_price(self, response):
        meta = response.meta.copy()
        product = meta['product']
        price_sel = response.xpath('//span[@itemprop="price"]/'
                                   'span[@class="price"]/text() | '
                                   '//p[@class="special-price"]/'
                                   'span[@itemprop="price"]/text()')
        if price_sel:
            price = is_empty(price_sel.extract()).strip()
            price = is_empty(re.findall(r'\d+,\d+', price))
            price = price.replace(',', '.')
            product['price'] = Price(priceCurrency="EUR", price=price)
            return 'price'

        else:
            product['price'] = Price(priceCurrency="EUR", price=float(0))
コード例 #11
0
    def parse_marketplace(self, response):
        product = response.meta['product']

        try:
            data = json.loads(response.body)
        except ValueError:
            return product

        sel = Selector(text=data.get("html", ""))

        marketplaces = []
        for seller in sel.xpath("//div[contains(@class, 'unit')]"):
            price = is_empty(
                seller.xpath(
                    "div/div/div[contains(@class, 'larg-price')]/text()").re(
                        FLOATING_POINT_RGEX))
            name = is_empty(seller.xpath("div/div/span/a/text()").extract())

            marketplaces.append({
                "price":
                Price(price=price, priceCurrency="AED"),
                "name":
                name
            })

        if marketplaces:
            product["marketplace"] = marketplaces

        return product
コード例 #12
0
    def _parse_store_status(self, response):
        """Checking availability in stores and adding store price to product"""
        reqs = response.meta['reqs']
        product = response.meta['product']
        try:
            currency = re.findall('priceCurrency=(.*?),',str(product['price']))[0]
        except:
            currency = 'CAD'
        data = json.loads(response.body_as_unicode())
        for store in data['products'][0]['results']:
            try:
                if store['availability'] != '70': #Not in store status
                    price = store['minCurrentPrice']
                else:
                    price = None
            except KeyError:
                price = None
                continue

            if price:
                product['price'] = Price(priceCurrency=currency, price=str(price))
                break

        if price:
            if product['is_out_of_stock']:
                    product['is_in_store_only'] = True
        else:
            product['is_in_store_only'] = False

        if reqs:
            return self.send_next_request(reqs, response)

        return product
コード例 #13
0
 def _populate_from_html(self, response, product):
     reseller_id = re.findall('\/sku(\d+)', response.url)
     # reseller_id = reseller_id[0] if reseller_id else None
     cond_set(product, 'reseller_id', reseller_id)
     cond_set(product, 'title',
              response.css('[itemprop=name]::text').extract())
     cond_set(product, 'brand',
              response.css('#ctl00_content_lnkBrand::text').extract())
     cond_set(product, 'price',
              response.css('[itemprop=price]::text').extract())
     if product.get('price', '') and not isinstance(product['price'], Price):
         if not 'Rp' in product['price']:
             self.log('Unrecognized currency at %s' % response.url)
         else:
             product['price'] = Price(
                 price=product['price'].lower().replace(
                     'rp', '').replace(',', '').strip(),
                 priceCurrency='IDR'
             )
     cond_replace(product, 'image_url',
                  response.css('#prodMedia img::attr(src)').extract())
     specs = response.css('.spesifications').extract()
     specs = specs[0] if specs else ''
     description = product.get('description', '') + specs.strip()
     cond_replace_value(product, 'description', description)
     self._get_model_from_title(product)
コード例 #14
0
    def _parse_price(self, data):
        price = data.get('currentPrice')

        if price:
            price = Price(priceCurrency="GBP", price=price)

        return price
コード例 #15
0
ファイル: tesco.py プロジェクト: lifelonglearner127/tmtext
 def _parse_single_product(self, response):
     productdata = "[" + is_empty(
         response.xpath('//meta[@name="productdata"]/@content').extract(),
         "")[:-1].replace("|", ",") + "]"
     productdata = is_empty(json.loads(productdata))
     product = SiteProductItem()
     if productdata:
         product["title"] = productdata["name"]
         product["is_out_of_stock"] = not productdata["available"]
         product["url"] = "http://www.tesco.com/groceries/product/details/"\
             "?id=" + str(productdata["productId"])
         regex = "id=([A-Z0-9\-]+)"
         reseller_id = re.findall(regex, product.get('url', ''))
         reseller_id = reseller_id[0] if reseller_id else None
         cond_set_value(product, "reseller_id", reseller_id)
         try:
             product["price"] = Price(price=productdata["price"],
                                      priceCurrency="GBP")
         except:
             pass
         product["image_url"] = productdata["mediumImage"]
         product["search_term"] = ""
         product["brand"] = is_empty(self.brand_from_title(
             product["title"]))
         product["site"] = is_empty(self.allowed_domains)
     if self.product_url:
         product['is_single_result'] = True
         if product.get("search_term"):
             del product['search_term']
     return product
コード例 #16
0
ファイル: waitrose.py プロジェクト: lifelonglearner127/tmtext
    def _scrape_product_links(self, response):
        data = WaitroseProductsSpider._get_data(response)
        for product_data in data['products']:
            product = SiteProductItem()

            for product_key, data_key in self._PRODUCT_TO_DATA_KEYS.items():
                value = product_data.get(data_key, 'null')
                if value != 'null':
                    product[product_key] = product_data[data_key]

            image_url = product.get('image_url', 'None')
            if image_url:
                product['image_url'] = urlparse.urljoin('http://', image_url)

            # This one is not in the mapping since it requires transformation.
            #product['upc'] = int(product_data['productid'])

            if product.get('price', None):
                price = product['price']
                price = price.replace('&pound;', 'p')
                price = re.findall('(p? *[\d ,.]+ *p?) *', price)
                price = price[0] if price else ''
                if price.endswith('p'):
                    price = '0.' + price.strip()
                if 'p' in price:
                    price = re.sub('[p ,]', '', price)
                    product['price'] = Price(priceCurrency='GBP', price=price)
                else:
                    self.log('Unknown price format at %s' % response)

            if not product.get('url', '').startswith('http'):
                product['url'] = urlparse.urljoin('http://www.waitrose.com',
                                                  product['url'])

            yield product['url'], product
コード例 #17
0
ファイル: amazon.py プロジェクト: gridl/ecommerce_spider
 def _get_price(self, response, product):
     """ Parses and sets the product price, with all possible variations
     :param response: Scrapy's Response obj
     :param product: Scrapy's Item (dict, basically)
     :return: None
     """
     cond_set(
         product,
         'price',
         response.css('#priceblock_ourprice ::text'
                      ', #unqualifiedBuyBox .a-color-price ::text'
                      ', #priceblock_saleprice ::text'
                      ', #actualPriceValue ::text'
                      ', #buyNewSection .offer-price ::text').extract(),
     )
     if not product.get('price', None):
         cond_set(
             product, 'price',
             response.xpath(
                 '//td/b[@class="priceLarge"]/text() |'
                 '//span[@class="olp-padding-right"]'
                 '/span[@class="a-color-price"]/text() |'
                 '//div[contains(@data-reftag,"atv_dp_bb_est_hd_movie")]'
                 '/button/text() |'
                 '//span[@id="priceblock_saleprice"]/text() |'
                 '//li[@class="swatchElement selected"]'
                 '//span[@class="a-color-price"]/text() |'
                 '//div[contains(@data-reftag,"atv_dp_bb_est_sd_movie")]'
                 '/button/text() |'
                 '//div[@id="mocaBBRegularPrice"]'
                 '/div/text()[normalize-space()]').extract())
     if product.get('price', None):
         if not '$' in product['price']:
             if 'FREE' in product['price'] or ' ' in product['price']:
                 product['price'] = Price(priceCurrency='USD', price='0.00')
             else:
                 self.log('Currency symbol not recognized: %s' %
                          response.url,
                          level=ERROR)
         else:
             price = re.findall('[\d ,.]+\d', product['price'])
             price = re.sub('[, ]', '', price[0])
             product['price'] = Price(
                 priceCurrency='USD',
                 price=price.replace('$', '').strip()\
                     .replace(',', '')
             )
コード例 #18
0
ファイル: soap.py プロジェクト: lifelonglearner127/tmtext
 def _unify_price(self, product):
     price = product.get('price')
     if price is None:
         return
     is_usd = not price.find('$')
     price = price[1:].replace(',', '')
     if is_usd and price.replace('.', '').isdigit():
         product['price'] = Price('USD', price)
コード例 #19
0
ファイル: ocado.py プロジェクト: lifelonglearner127/tmtext
    def parse_product(self, response):
        product = response.meta['product']

        title_list = response.xpath(
            "//h1[@class='productTitle'][1]//text()").extract()
        if len(title_list) >= 2:
            cond_set_value(product, 'title', self.clear_desc(title_list[-2:]))

        cond_set(
            product, 'price',
            response.xpath(
                "//div[@id='bopRight']//meta[@itemprop='price']/@content").
            extract())

        if product.get('price', None):
            if isinstance(product['price'], str):
                product['price'] = product['price'].decode('utf8')
            if not u'£' in product['price']:
                self.log('Unknown currency at %s' % response.url, level=ERROR)
            else:
                product['price'] = Price(priceCurrency='GBP',
                                         price=product['price'].replace(
                                             u'£',
                                             '').replace(' ', '').replace(
                                                 ',', '').strip())

        img_url = response.xpath(
            "//ul[@id='galleryImages']/li[1]/a/@href").extract()
        if img_url:
            cond_set_value(product, 'image_url',
                           urlparse.urljoin(response.url, img_url[0]))

        cond_set_value(
            product, 'description',
            self.clear_desc(
                response.xpath(
                    "//div[@id='bopBottom']"
                    "//h2[@class='bopSectionHeader' and text()[1]='Product Description'][1]"
                    "/following-sibling::*[@class='bopSection']"
                    "//text()").extract()))

        cond_set_value(product, 'locale', "en_GB")

        regex = "\/(\d+)"
        reseller_id = re.findall(regex, response.url)
        reseller_id = reseller_id[0] if reseller_id else None
        cond_set_value(product, "reseller_id", reseller_id)

        cond_set(
            product,
            'brand',
            response.xpath(
                "string(//div[@id='bopBottom']//*[@itemprop='brand'])").
            extract(),
            string.strip,
        )

        return product
コード例 #20
0
ファイル: tesco.py プロジェクト: lifelonglearner127/tmtext
    def _scrape_product_links(self, response):
        # To populate the description, fetching the product page is necessary.

        if self.user_agent_key not in ["desktop", "default"]:
            links = response.xpath(
                '//section[contains(@class,"product_listed")]'
                '//div[contains(@class,"product_info")]//a/@href').extract()

            if not links:
                self.log(
                    "[Mobile] Found no product data on: %s" % response.url,
                    ERROR)

            for link in links:
                yield urlparse.urljoin(response.url, link), SiteProductItem()
        else:
            url = response.url

            # This will contain everything except for the URL and description.
            product_jsons = response.xpath(
                '//meta[@name="productdata"]/@content').extract()

            if not product_jsons:
                self.log("Found no product data on: %s" % url, ERROR)

            product_links = response.css(
                ".product > .desc > h2 > a ::attr('href')").extract()
            if not product_links:
                self.log("Found no product links on: %s" % url, ERROR)

            for product_json, product_link in zip(product_jsons[0].split('|'),
                                                  product_links):
                prod = SiteProductItem()
                cond_set_value(prod, 'url',
                               urlparse.urljoin(url, product_link))

                product_data = json.loads(product_json)

                cond_set_value(prod, 'price', product_data.get('price'))
                cond_set_value(prod, 'image_url',
                               product_data.get('mediumImage'))

                #prod['upc'] = product_data.get('productId')
                if prod.get('price', None):
                    prod['price'] = Price(price=str(prod['price']).replace(
                        ',', '').strip(),
                                          priceCurrency='GBP')

                try:
                    brand, title = self.brand_from_title(product_data['name'])
                    cond_set_value(prod, 'brand', brand)
                    cond_set_value(prod, 'title', title)
                except KeyError:
                    raise AssertionError(
                        "Did not find title or brand from JS for product: %s" %
                        product_link)

                yield None, prod
コード例 #21
0
ファイル: bestbuy.py プロジェクト: lifelonglearner127/tmtext
 def _unify_price(self, product):
     price = product.get('price')
     if not price:
         return
     price_match = re.search('\$ *([, 0-9]+(?:\.[, 0-9]+)?)', price)
     if price_match:
         price = price_match.group(1)
         price = ''.join(re.split('[ ,]+', price))
     cond_replace_value(product, 'price', Price('USD', price))
コード例 #22
0
ファイル: riteaid.py プロジェクト: lifelonglearner127/tmtext
    def _parse_price(self, response):
        price = response.xpath('//*[@itemprop="price"]/text()').re('[\d\.]+')
        currency = response.xpath(
            '//*[@itemprop="priceCurrency"]/@content').re('\w{2,3}') or ['USD']

        if not price:
            return None

        return Price(price=price[0], priceCurrency=currency[0])
コード例 #23
0
ファイル: shoprite.py プロジェクト: lifelonglearner127/tmtext
 def _parse_price(product_info):
     currency = 'USD'
     price_raw = product_info.get('CurrentPrice', '')
     price = FLOATING_POINT_RGEX.findall(price_raw)
     if not price or 'for' in price_raw:
         price_raw = product_info.get('RegularPrice', '')
         price = FLOATING_POINT_RGEX.findall(price_raw)
     price = float(price[0]) if price else 0.0
     return Price(price=price, priceCurrency=currency)
コード例 #24
0
    def _parse_price(self, response):
        price = is_empty(
            response.xpath(
                '//p[@class="special-price"]/span[@itemprop="price"]/text()'
                ' |//span[@class="regular-price"]/span[@itemprop="price"]/text()'
            ).extract(), 0.00)
        if price:
            price = is_empty(re.findall(r'(\d+\.\d+)', price))

        return Price(price=price, priceCurrency='GBP')
コード例 #25
0
    def _parse_price(self, response):
        price = response.xpath('//*[@class="price"]/text()').re('[\d\.\,]+')
        if not price:
            price = response.xpath('.//*[@itemprop="price"]/@content').re(
                '[\d\.\,]+')

        if not price:
            return None
        price = price[0].replace(',', '')
        return Price(price=price, priceCurrency='USD')
コード例 #26
0
 def _parse_price(response):
     dell_price = response.xpath('//*[contains(text(), "Dell Price")]')
     dell_price = re.search(
         '\$([\d,]+\.\d+)',
         ''.join(dell_price.xpath('./..//text()').extract()))
     if dell_price:
         dell_price = dell_price.group(1)
         price = Price(price=dell_price, priceCurrency='USD')
         return price
     price = response.xpath('//*[contains(@name, "pricing_sale_price")]'
                            '[contains(text(), "$")]//text()').extract()
     if not price:
         price = response.xpath(
             '//*[contains(@name, "pricing_retail_price")]'
             '[contains(text(), "$")]//text()').extract()
     if price:
         price = Price(price=price[0].strip().replace('$', ''),
                       priceCurrency='USD')
         return price
コード例 #27
0
 def _populate_variants(self, response, product, variants):
     variants = response.meta.get('variants', {})
     if variants is None or len(variants) == 0:
         return product
     for k, v in variants.items():
         if 'sizes' in v:
             continue
         url = v['href']
         new_meta = response.meta.copy()
         request = Request(url,
                           callback=self._parse_variants_cb,
                           meta=new_meta)
         return request
     if len(variants) == 1:
         k = list(variants)[0]
         sizes = variants[k]['sizes']
         if len(sizes) == 0:
             return product
     prodlist = []
     for color, v in variants.items():
         image_url = variants[color]['image_url']
         if len(v['sizes']) == 0:
             new_product = product.copy()
             new_product['model'] = color
             new_product['price'] = v['price']
             if not '£' in new_product['price']:
                 self.log('Unknown currency at %s' % response.url)
             else:
                 new_product['price'] = Price(
                     price=new_product['price'].replace(',', '').replace(
                         '£', '').strip(),
                     priceCurrency='GBP')
             new_product['image_url'] = image_url
             stock = v['stock']
             if stock == '0':
                 new_product['is_out_of_stock'] = True
             prodlist.append(new_product)
         else:
             for size, sizeattrs in variants[color]['sizes'].items():
                 price = sizeattrs['price']
                 stock = sizeattrs['stock']
                 # print color, size, price, stock, image_url
                 new_product = product.copy()
                 new_product['model'] = color + ":" + size
                 new_product['price'] = price
                 new_product['image_url'] = image_url
                 if stock == '0':
                     new_product['is_out_of_stock'] = True
                 if 'code' in sizeattrs:
                     try:
                         new_product['upc'] = int(sizeattrs['code'])
                     except ValueError:
                         pass
                 prodlist.append(new_product)
     return prodlist
コード例 #28
0
    def _parse_price(self, response):
        try:
            price = min(
                map((float),
                    re.findall('"finalPrice":"([\d\.]+)"', response.body)))
            return Price(price=price, priceCurrency='USD')

        except:
            import traceback
            print traceback.print_exc()
            return None
コード例 #29
0
ファイル: nextcouk.py プロジェクト: lifelonglearner127/tmtext
    def _parse_price(self, response, item):
        product = response.meta['product']

        price_sel = item.css('.Price')

        if price_sel:
            price = is_empty(price_sel.extract()).strip()
            price = is_empty(re.findall(r'(\d+)', price))
            product['price'] = Price(priceCurrency="GBP", price=price)
        else:
            product['price'] = None
コード例 #30
0
    def parse_product(self, response):
        product = response.meta['product']
        vid = 1
        if "vid" in response.meta:
            vid = response.meta['vid']
        if 'OutOfStockNoResults' in response.url:
            self.log("Product OutOfStock %s %s" % (response.url, product),
                     DEBUG)
            return

        if not product.get("price"):
            price = is_empty(
                response.xpath(
                    "//span[@id='priceText']/text() |" \
                    "//div[@id='tabWindow']/noscript"
                ).extract(),
                ""
            )
            price = is_empty(re.findall("\d+\.\d+", price)[::-1])
            if price:
                product["price"] = Price(price=price, priceCurrency="USD")

        title = product.get('title')
        if isinstance(title, str):
            product['title'] = title.decode('utf-8', 'ignore')
            title = product.get('title')
        else:
            title = is_empty(
                response.xpath(
                    "//div[@id='productNameText']/h1/text()").extract())
            if title:
                product["title"] = title

        brindex = title.find("&#153")
        if brindex > 1:
            brand = title[:brindex]
            cond_set_value(product, 'brand', brand)
            # print "BRAND=", brand
        cond_set_value(product, 'brand', self.BRAND)
        cond_set(product, 'description',
                 response.xpath("//div[@id='tabWindow']").extract())
        product['locale'] = "en-US"

        new_meta = response.meta.copy()
        pid = product.get('upc')
        if not pid:
            pid = re.findall("pid=(\d+)", response.url)
            if pid:
                pid = pid[0]
        url = self.PRODUCT_URL_JS.format(pid=pid, vid=vid)
        return Request(url,
                       callback=self._parse_product_js,
                       meta=new_meta,
                       priority=100)