Esempio n. 1
0
class CostcoProductsSpider(BaseProductsSpider):
    name = "costco_products"
    allowed_domains = ["costco.com"]
    start_urls = []

    SEARCH_URL = "http://www.costco.com/CatalogSearch?pageSize=96" \
        "&catalogId=10701&langId=-1&storeId=10301" \
        "&currentPage=1&keyword={search_term}"
    selenium_retries = 5
    DEFAULT_CURRENCY = u'USD'

    REVIEW_URL = 'http://api.bazaarvoice.com/data/products.json?passkey=bai25xto36hkl5erybga10t99&apiversion=5.5&filter=id:{product_id}&stats=reviews'

    def __init__(self, *args, **kwargs):
        self.br = BuyerReviewsBazaarApi(called_class=self)

        super(CostcoProductsSpider, self).__init__(
            site_name=self.allowed_domains[0], *args, **kwargs)

    def _parse_single_product(self, response):
        return self.parse_product(response)

    def parse_product(self, response):
        prod = response.meta['product']

        meta = response.meta.copy()
        reqs = []
        meta['reqs'] = reqs

        # TODO since response.body is already downloaded by scrapy
        # may try to run it in selenium instead of downloading the page again
        selenium_html = self._get_page_html_selenium(response.url)
        # TODO might as well use that html to extract other data

        for x in range(self.selenium_retries - 1):
            if not selenium_html:
                selenium_html = self._get_page_html_selenium(response.url)
            else:
                break

        if selenium_html:
            price = Selector(text=selenium_html).xpath(
                './/*[contains(@class, "your-price")]/span[@class="value"]/text()').extract()
            cond_set_value(prod, 'price', Price(priceCurrency=self.DEFAULT_CURRENCY,
                                                price=price))

        # not longer available
        no_longer_available = response.xpath(
            '//*[@class="server-error" and contains(text(),'
            '"out of stock and cannot be added to your cart at this time")]')
        cond_set_value(prod, 'no_longer_available', 1 if no_longer_available else 0)

        if not no_longer_available and response.xpath('//h1[text()="Product Not Found"]'):
            prod['not_found'] = True
            return prod

        model = response.xpath('//div[@id="product-tab1"]//text()').re(
            'Model[\W\w\s]*')
        if len(model) > 0:
            cond_set(prod, 'model', model)
            if 'model' in prod:
                prod['model'] = re.sub(r'Model\W*', '', prod['model'].strip())

        title = response.xpath('//h1[@itemprop="name"]/text()').extract()
        cond_set(prod, 'title', title)

        # Title key must be present even if it is blank
        cond_set_value(prod, 'title', "")

        tab2 = ''.join(
            response.xpath('//div[@id="product-tab2"]//text()').extract()
        ).strip()
        brand = ''
        for i in tab2.split('\n'):
            if 'Brand' in i.strip():
                brand = i.strip()
        brand = re.sub(r'Brand\W*', '', brand)
        if brand:
            prod['brand'] = brand
        if not prod.get("brand"):
            brand = response.xpath(
                    './/*[contains(text(), "Brand:")]/following-sibling::text()[1]').extract()
            brand = brand[0].strip() if brand else None
            cond_set_value(prod, 'brand', brand)

        des = response.xpath('//div[@id="product-tab1"]//text()').extract()
        des = ' '.join(i.strip() for i in des)
        if '[ProductDetailsESpot_Tab1]' in des.strip():
            des = response.xpath("//div[@id='product-tab1']/*[position()>1]//text()").extract()
            des = ' '.join(i.strip() for i in des)
            if des.strip():
                prod['description'] = des.strip()

        elif des:
            prod['description'] = des.strip()

        img_url = response.xpath('//img[@itemprop="image"]/@src').extract()
        cond_set(prod, 'image_url', img_url)

        cond_set_value(prod, 'locale', 'en-US')
        prod['url'] = response.url

        # Categories
        categorie_filters = ['home']
        # Clean and filter categories names from breadcrumb
        categories = list(filter((lambda x: x.lower() not in categorie_filters),
                        map((lambda x: x.strip()), response.xpath('//*[@itemprop="breadcrumb"]//a/text()').extract())))

        category = categories[-1] if categories else None

        cond_set_value(prod, 'categories', categories)
        cond_set_value(prod, 'category', category)

        # Minimum Order Quantity
        try:
            minium_order_quantity = re.search('Minimum Order Quantity: (\d+)', response.body_as_unicode()).group(1)
            cond_set_value(prod, 'minimum_order_quantity', minium_order_quantity)
        except:
            pass

        shipping = ''.join(response.xpath(
            '//*[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ",'
            ' "abcdefghijklmnopqrstuvwxyz"), "shipping & handling:")]'
        ).re('[\d\.\,]+')).strip().replace(',', '')
        if not shipping:
            shipping = ''.join(response.xpath(
            '//*[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ",'
            ' "abcdefghijklmnopqrstuvwxyz"), "shipping and handling:")]'
        ).re('[\d\.\,]+')).strip().replace(',', '')

        if shipping:
            cond_set_value(prod, 'shipping_cost', Price(priceCurrency=self.DEFAULT_CURRENCY,
                                                        price=shipping))

        shipping_included = ''.join(response.xpath(
            '//*[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ",'
            ' "abcdefghijklmnopqrstuvwxyz"),"shipping & handling included")]'
        ).extract()).strip().replace(',', '') or \
            response.xpath(
                '//*[@class="merchandisingText" and '
                'contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", '
                '"abcdefghijklmnopqrstuvwxyz"), "free shipping")]') or \
            ''.join(response.xpath(
                '//p[contains(translate(text(), "ABCDEFGHIJKLMNOPQRSTUVWXYZ",'
                ' "abcdefghijklmnopqrstuvwxyz"),"shipping and handling included")]'
            ).extract()).strip().replace(',', '')

        cond_set_value(prod, 'shipping_included', 1 if shipping_included or shipping == "0.00" else 0)

        available_store = re.search('Item may be available in your local warehouse', response.body_as_unicode())
        cond_set_value(prod, 'available_store', 1 if available_store else 0)

        not_available_store = re.search('Not available for purchase on Costco.com', response.body_as_unicode())
        cond_set_value(prod, 'available_online', 0 if not_available_store else 1)

        if str(prod.get('available_online', None)) == '0' and str(prod.get('available_store', None)) == '0':
            prod['is_out_of_stock'] = True

        count_review = response.xpath('//meta[contains(@itemprop, "reviewCount")]/@content').extract()
        product_id = re.findall(r'\.(\d+)\.', response.url)
        cond_set_value(prod, 'reseller_id', product_id[0] if product_id else None)

        if product_id and count_review:
            reqs.append(
                Request(
                    url=self.REVIEW_URL.format(product_id=product_id[0], index=0),
                    dont_filter=True,
                    callback=self.parse_buyer_reviews,
                    meta=meta
                ))

        if reqs:
            return self.send_next_request(reqs, response)

        return prod

    def parse_buyer_reviews(self, response):
        meta = response.meta.copy()
        product = response.meta['product']
        reqs = meta.get('reqs', [])

        product['buyer_reviews'] = self.br.parse_buyer_reviews_products_json(response)

        if reqs:
            return self.send_next_request(reqs, response)
        else:
            return product

    def send_next_request(self, reqs, response):
        """
        Helps to handle several requests
        """
        req = reqs.pop(0)
        new_meta = response.meta.copy()
        if reqs:
            new_meta["reqs"] = reqs

        return req.replace(meta=new_meta)

    def _search_page_error(self, response):
        if not self._scrape_total_matches(response):
            self.log("Costco: unable to find a match", ERROR)
            return True
        return False

    def _scrape_total_matches(self, response):
        count = response.xpath(
            '//*[@id="secondary_content_wrapper"]/div/p/span/text()'
        ).re('(\d+)')
        count = int(count[-1]) if count else None
        if not count:
            count = response.xpath(
                '//*[@id="secondary_content_wrapper"]'
                '//span[contains(text(), "Showing results")]/text()'
            ).extract()
            count = int(count[0].split(' of ')[1].replace('.', '').strip()) if count else None
        if not count:
            count = response.css(".table-cell.results.hidden-xs.hidden-sm.hidden-md>span").re(
                r"Showing\s\d+-\d+\s?of\s?([\d.,]+)")
            count = int(count[0].replace('.', '').replace(',', '')) if count else None
        return count

    def _scrape_product_links(self, response):
        links = response.xpath(
            '//div[contains(@class,"product-list grid")]//a[contains(@class,"thumbnail")]/@href'
        ).extract()
        for link in links:
            yield link, SiteProductItem()

    def _scrape_next_results_page_link(self, response):
        links = response.xpath(
            "//*[@class='pagination']"
            "/ul[2]"  # [1] is for the Items Per Page section which has .active.
            "/li[@class='active']"
            "/following-sibling::li[1]"  # [1] is to get just the next sibling.
            "/a/@href"
        ).extract()
        if links:
            link = links[0]
        else:
            link = None

        return link

    def _get_page_html_selenium(self, url):
        try:
            display = Display(visible=False)
            display.start()
            driver = self._init_chromium()
            driver.set_page_load_timeout(120)
            driver.set_script_timeout(120)
            socket.setdefaulttimeout(120)
            driver.set_window_size(1280, 768)
            driver.get(url)
            time.sleep(5)
            page_html = driver.page_source
            driver.quit()
        except Exception as e:
            self.log('Exception while getting page html with selenium: {}'.format(e), WARNING)
            self.log('### Traceback: {}'.format(traceback.format_exc()), WARNING)
        else:
            return page_html

    def _init_chromium(self, proxy=None, proxy_type=None):
        # TODO use random useragent script here?
        # UA = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:32.0) Gecko/20100101 Firefox/32.0"
        chrome_flags = webdriver.DesiredCapabilities.CHROME  # this is for Chrome?
        chrome_options = webdriver.ChromeOptions()  # this is for Chromium
        if proxy:
            chrome_options.add_argument(
                '--proxy-server=%s' % proxy_type+'://'+proxy)
        # chrome_flags["chrome.switches"] = ['--user-agent=%s' % UA]
        # chrome_options.add_argument('--user-agent=%s' % UA)
        executable_path = '/usr/sbin/chromedriver'
        if not os.path.exists(executable_path):
            executable_path = '/usr/local/bin/chromedriver'
        # initialize webdriver
        driver = webdriver.Chrome(desired_capabilities=chrome_flags,
                                  chrome_options=chrome_options,
                                  executable_path=executable_path)
        return driver
Esempio n. 2
0
class PetcoProductsSpider(ProductsSpider):
    name = 'petco_products'
    allowed_domains = ['petco.com']

    SEARCH_URL = ("http://www.petco.com/shop/SearchDisplay?categoryId=&storeId"
                  "=10151&catalogId=10051&langId=-1&sType=SimpleSearch&"
                  "resultCatEntryType=2&showResultsPage=true&searchSource=Q&"
                  "pageView=&beginIndex=0&pageSize=48&fromPageValue=search"
                  "&searchTerm={search_term}")

    SEARCH_URL_2 = ("http://www.petco.com/shop/ProductListingView?searchType="
                    "12&filterTerm=&langId=-1&advancedSearch=&sType=Simple"
                    "Search&resultCatEntryType=2&catalogId=10051&searchTerm="
                    "{search_term}&resultsPerPage=48&emsName=&facet=&category"
                    "Id=&storeId=10151&beginIndex={begin_index}")

    REVIEW_URL = ("http://api.bazaarvoice.com/data/products.json?"
                  "passkey=dpaqzblnfzrludzy2s7v27ehz&apiversion=5.5"
                  "&filter=id:{product_id}&stats=reviews")

    PRICE_URL = "http://www.petco.com/shop/GetCatalogEntryDetailsByIDView"

    def __init__(self, *args, **kwargs):
        super(PetcoProductsSpider, self).__init__(*args, **kwargs)
        self.br = BuyerReviewsBazaarApi(called_class=self)
        self.product_last_page = 0

    def parse_buyer_reviews(self, response):
        meta = response.meta.copy()
        product = response.meta['product']
        reqs = meta.get('reqs', [])

        product['buyer_reviews'] = self.br.parse_buyer_reviews_products_json(
            response)

        if reqs:
            return self.send_next_request(reqs, response)
        else:
            return product

    def _total_matches_from_html(self, response):
        total = response.xpath(
            '//*[contains(@id,"searchTotalCount")]/text()').re('\d+')
        return int(total[0].replace(',', '')) if total else 0

    def _scrape_results_per_page(self, response):
        return 48

    def _scrape_next_results_page_link(self, response):
        # End of pagination
        if not self.product_last_page:
            return None

        begin_index = int(re.search('beginIndex=(\d+)', response.url).group(1))
        num_poduct_page = self._scrape_results_per_page(response)
        st = response.meta['search_term']
        url = self.url_formatter.format(
            self.SEARCH_URL_2,
            search_term=urllib.quote_plus(st.encode('utf-8')),
            begin_index=str(begin_index + num_poduct_page))
        return url

    def _scrape_product_links(self, response):
        item_urls = response.xpath(
            '//*[@class="product-display-grid"]'
            '//*[@class="product-name"]/a/@href').extract()

        self.product_last_page = len(item_urls)
        for item_url in item_urls:
            yield item_url, SiteProductItem()

    def _parse_single_product(self, response):
        return self.parse_product(response)

    def _parse_title(self, response):
        title = response.xpath('//h1/text()').extract()
        return title[0].strip() if title else None

    def _parse_categories(self, response):
        categories = response.css('.breadcrumb a::text').extract()
        return categories if categories else None

    def _parse_category(self, response):
        categories = self._parse_categories(response)
        return categories[-1] if categories else None

    def _parse_image_url(self, response):
        image_url = response.xpath(
            '//*[@property="og:image"]/@content').extract()
        return image_url[0] if image_url else None

    def _parse_brand(self, response):
        brand = response.xpath('//*[@class="product-brand"]/a/text()').re(
            'by.(.*)')

        return brand[0].strip() if brand else None

    def _parse_sku(self, response):
        sku = response.xpath("//input[@id='primarySku']/@value").extract()
        if len(sku[0]) < 1:
            sku = response.css('.product-sku::text').re(u'SKU:.(\d+)')

        return sku[0] if sku else None

    def _parse_variants(self, response):
        variants = []

        try:
            variants_info = json.loads(
                response.xpath(
                    '//*[contains(@id,"entitledItem_")]/text()').extract()[0])
        except:
            variants_info = {}

        for attr_value in variants_info:
            attributes = {}
            variant_attribute = attr_value["Attributes"]
            attributes['price'] = attr_value["RepeatDeliveryPrice"]["price"]
            attributes['image_url'] = attr_value["ItemImage"]
            if variant_attribute:
                attr_text = attr_value["Attributes"].keys()[0].split('_')
                attributes[attr_text[0]] = attr_text[1]

            variants.append(attributes)

        return variants if variants else None

    def _parse_is_out_of_stock(self, response):
        status = response.xpath(
            '//*[@itemprop="availability" and @content="in_stock"]')
        return not bool(status)

    def _parse_shipping_included(self, response):
        pass

    def _parse_description(self, response):
        description = response.xpath('//*[@id="description"]').extract()

        return ''.join(description).strip() if description else None

    def send_next_request(self, reqs, response):
        """
        Helps to handle several requests
        """

        req = reqs.pop(0)
        new_meta = response.meta.copy()
        if reqs:
            new_meta["reqs"] = reqs
        return req.replace(meta=new_meta)

    def parse_product(self, response):
        reqs = []
        product = response.meta['product']

        # Set locale
        product['locale'] = 'en_US'

        # Parse title
        title = self._parse_title(response)
        cond_set_value(product, 'title', title, conv=string.strip)

        # Parse category
        category = self._parse_category(response)
        cond_set_value(product, 'category', category)

        # Parse categories
        categories = self._parse_categories(response)
        cond_set_value(product, 'categories', categories)

        # Parse description
        description = self._parse_description(response)
        cond_set_value(product, 'description', description)

        # Parse image url
        image_url = self._parse_image_url(response)
        cond_set_value(product, 'image_url', image_url)

        # Parse variants
        variants = self._parse_variants(response)
        cond_set_value(product, 'variants', variants)

        # Parse stock status
        out_of_stock = self._parse_is_out_of_stock(response)
        cond_set_value(product, 'is_out_of_stock', out_of_stock)

        # Sku
        sku = self._parse_sku(response)
        cond_set_value(product, 'sku', sku)

        # Reseller_id
        cond_set_value(product, 'reseller_id', sku)

        # Brand
        brand = self._parse_brand(response)
        cond_set_value(product, 'brand', brand)

        product_id = response.xpath(
            '//*[@id="productPartNo"]/@value').extract()

        if product_id:
            reqs.append(
                Request(url=self.REVIEW_URL.format(product_id=product_id[0],
                                                   index=0),
                        dont_filter=True,
                        callback=self.parse_buyer_reviews,
                        meta=response.meta))

        price_id = response.xpath('//*[contains(@id,"entitledItem_")]/@id').re(
            'entitledItem_(\d+)')

        cat_id = response.xpath('//script/text()').re(
            'productDisplayJS.displayAttributeInfo\("(\d+)","(\d+)"')

        if not cat_id:
            cat_id = response.xpath(
                '//*[@name="firstAvailableSkuCatentryId_avl"]/@value').extract(
                )

        if price_id and cat_id:
            text = ("storeId=10151&langId=-1&catalogId=10051&"
                    "catalogEntryId={cat}&productId={prod_id}".format(
                        cat=cat_id[0], prod_id=price_id[0]))
            reqs.append(
                Request(self.PRICE_URL,
                        body=text,
                        headers={
                            'Content-Type':
                            'application/x-www-form-urlencoded',
                            'X-Requested-With': 'XMLHttpRequest'
                        },
                        method='POST',
                        meta=response.meta,
                        callback=self._parse_price,
                        dont_filter=True))

        else:
            prices = map(
                float,
                response.xpath('//*[@class="product-price"]//span/text()').re(
                    '\$([\d\.]+)'))
            product['price'] = Price(price=min(prices), priceCurrency="USD")

        if reqs:
            return self.send_next_request(reqs, response)

        return product

    def _parse_price(self, response):
        reqs = response.meta.get('reqs', [])
        product = response.meta['product']

        raw_information = re.findall('\{.*\}', response.body,
                                     re.MULTILINE | re.DOTALL)[0]

        product_data = eval(raw_information)
        price = product_data["catalogEntry"]["offerPrice"]
        product['price'] = Price(price=price, priceCurrency="USD")

        if reqs:
            return self.send_next_request(reqs, response)

        return product
Esempio n. 3
0
class CvsProductsSpider(BaseProductsSpider):
    name = 'cvs_products'
    allowed_domains = ["cvs.com", "api.bazaarvoice.com"]
    start_urls = []

    SEARCH_URL = "https://www.cvs.com/search/N-0?searchTerm={search_term}"

    SEARCH_URL_AJAX = "https://www.cvs.com/" \
                      "retail/frontstore/OnlineShopService?" \
                      "apiKey=c9c4a7d0-0a3c-4e88-ae30-ab24d2064e43&" \
                      "apiSecret=4bcd4484-c9f5-4479-a5ac-9e8e2c8ad4b0&" \
                      "appName=CVS_WEB&" \
                      "channelName=WEB&" \
                      "contentZone=resultListZone&" \
                      "deviceToken=7780&" \
                      "deviceType=DESKTOP&" \
                      "lineOfBusiness=RETAIL&" \
                      "navNum=20&" \
                      "operationName=getProductResultList&" \
                      "pageNum={page_num}&" \
                      "referer={referer}&" \
                      "serviceCORS=False&" \
                      "serviceName=OnlineShopService&" \
                      "sortBy=relevance&" \
                      "version=1.0" \


    REVIEW_URL = "http://api.bazaarvoice.com/data/products.json?" \
                 "passkey=ll0p381luv8c3ler72m8irrwo&apiversion=5.5&" \
                 "filter=id:{product_id}&stats=reviews"

    PRICE_URL = "https://www.cvs.com/retail/frontstore/productDetails?" \
                "apiKey=c9c4a7d0-0a3c-4e88-ae30-ab24d2064e43&" \
                "apiSecret=4bcd4484-c9f5-4479-a5ac-9e8e2c8ad4b0&" \
                "appName=CVS_WEB&" \
                "channelName=WEB&" \
                "code={sku}&" \
                "codeType=sku&" \
                "deviceToken=2695&" \
                "deviceType=DESKTOP&" \
                "lineOfBusiness=RETAIL&" \
                "operationName=getSkuPricePromotions&" \
                "serviceCORS=True&" \
                "serviceName=productDetails&" \
                "storeId=2294&" \
                "version=1.0" \

    PRODUCT_DETAILS = "https://www.cvs.com/retail/frontstore/productDetails?" \
                      "apiKey=c9c4a7d0-0a3c-4e88-ae30-ab24d2064e43&" \
                      "apiSecret=4bcd4484-c9f5-4479-a5ac-9e8e2c8ad4b0&" \
                      "appName=CVS_WEB&" \
                      "channelName=WEB&" \
                      "code={sku}&" \
                      "codeType=sku&" \
                      "deviceToken=2695&" \
                      "deviceType=DESKTOP&" \
                      "lineOfBusiness=RETAIL&" \
                      "operationName=getSkuDetails&" \
                      "serviceCORS=True&" \
                      "serviceName=productDetails&" \
                      "version=1.0"

    def __init__(self, *args, **kwargs):
        self.br = BuyerReviewsBazaarApi(called_class=self)
        self.referer = None
        self.first_time_products = None
        self.current_page = 1
        self.products_per_page = 20
        super(CvsProductsSpider,
              self).__init__(site_name=self.allowed_domains[0],
                             *args,
                             **kwargs)
        settings.overrides['CRAWLERA_ENABLED'] = True

    def _set_brand(self, product, phrase, brands):
        phrase = _normalize(phrase)
        for brand in sorted(brands, key=len, reverse=True):
            if _normalize(brand) in phrase:
                cond_set_value(product, 'brand', brand)
                break

    def parse(self, response):
        print response.url
        if self.searchterms and not self.referer:
            self.referer = response.url
        return super(CvsProductsSpider, self).parse(response)

    def parse_product(self, response):
        brands = response.meta.get('brands', frozenset())
        product = response.meta['product']
        reqs = []

        if 'brand' not in product:
            descs = response.css('.brandBanner > a ::attr(title)')
            if descs:
                desc, = descs.extract()
                self._set_brand(product, desc, brands)
        product['locale'] = "en-US"

        reseller_id_regex = "prodid-(\d+)"
        reseller_id = re.findall(reseller_id_regex, response.url)
        reseller_id = reseller_id[0] if reseller_id else None
        cond_set_value(product, 'reseller_id', reseller_id)

        ld_json = is_empty(
            response.xpath('//*[@type="application/ld+json" '
                           'and contains(text(),"product")]/text()').extract())
        if ld_json:
            try:
                clean_json = re.sub('([^"])\n|\t|\r', '',
                                    ld_json.replace('@', ''))
                product_data = json.loads(clean_json)

                cond_set_value(product, 'title', product_data.get('name'))
                cond_set_value(product, 'brand', product_data.get('brand'))
                ########  variants ########
                variants = product_data.get('offers')
                if len(variants) > 1:
                    for variant in variants:
                        try:
                            sku = variant['itemOffered']['sku']
                            price_url = self.PRICE_URL.format(sku=sku)
                            reqs.append(
                                Request(price_url,
                                        self._parse_variant_new,
                                        meta=response.meta))
                        except:
                            pass

                main_variant = variants[0]
                description = main_variant.get(
                    'itemOffered',
                    {}).get('description') or product_data.get('description')
                cond_set_value(product, 'description', description)

                main_skuID_search = re.search("skuId=(\d+)", response.url)
                if main_skuID_search:
                    main_skuID = main_skuID_search.group(1)
                else:
                    main_skuID = variants[0].get('itemOffered',
                                                 {}).get('sku', None)

                cond_set_value(product, 'image_url',
                               main_variant.get('itemOffered').get('image'))
                response.meta['main_skuID'] = main_skuID
                response.meta['offers_variants'] = variants

                if main_variant.get('price'):
                    cond_set_value(
                        product, 'price',
                        Price(price=main_variant.get('price'),
                              priceCurrency='USD'))

                # elif product_data.get('productId'):
                #     price_url = self.PRICE_URL.format(
                #         price_id=product_data.get('productId'))
                #     reqs.append(Request(price_url,
                #                         self._parse_price,
                #                         meta=response.meta))

                # cond_set_value(product, 'variants',
                #                self._parse_variants(variants, main_skuID))

                ##############################
                if main_skuID:
                    review_url = self.REVIEW_URL.format(product_id=main_skuID)
                    reqs.append(
                        Request(review_url,
                                self._parse_review,
                                meta=response.meta))

            except:
                import traceback
                print traceback.print_exc()

        size = response.xpath(
            "//form[@id='addCart']/table/tr/td[@class='col1']/"
            "text()[.='Size:']/../../td[2]/text()").extract()
        cond_set(product, 'model', size, conv=string.strip)

        if reqs:
            return self.send_next_request(reqs, response)

        return product

    def send_next_request(self, reqs, response):
        """
        Helps to handle several requests
        """
        req = reqs.pop(0)
        new_meta = response.meta.copy()
        if reqs:
            new_meta["reqs"] = reqs
        return req.replace(meta=new_meta)

    def _parse_variant_new(self, response):
        product = response.meta['product']
        reqs = response.meta.get('reqs', [])
        data = json.loads(response.body)

        sku_price_promotions = data.get('response',
                                        {}).get('getSkuPricePromotions', [])

        if sku_price_promotions:
            sku_details = sku_price_promotions[0].get('skuDetails', [])

        if sku_details:
            variants = product.get('variants', [])
            variant = {}
            skuID = sku_details[0].get('skuId', '')
            variant['url'] = product.get('url', '') + "?skuId=%s" % skuID

            price = sku_details[0].get('priceInfo', {}).get('listPrice', None)
            if price:
                cond_set_value(product, 'price',
                               Price(price=price, priceCurrency='USD'))

            variant['price'] = price
            main_skuID = response.meta['main_skuID']
            variant['selected'] = main_skuID == skuID
            bohInventory = sku_details[0].get('statusInfo',
                                              {}).get('bohInventory', 0)
            bohStockStatus = sku_details[0].get('statusInfo', {}).get(
                'bohStockStatus', 'NOTAVAILABLE')
            onlineOnly = sku_details[0].get('statusInfo',
                                            {}).get('onlineOnly', False)
            onlineStockStatus = sku_details[0].get('statusInfo', {}).get(
                'onlineStockStatus', None)
            in_stock = False
            if bohInventory and bohStockStatus != 'NOTAVAILABLE':
                in_stock = True
            if onlineStockStatus == 'INSTOCK':
                in_stock = True
            variant['in_stock'] = in_stock
            variant['sku'] = skuID
            # del product['main_skuID']
            variant['properties'] = {}
            offers_variants = response.meta['offers_variants']
            for offers_variant in offers_variants:
                # Check that the variant is not duplicated
                item_offered = offers_variant.get('itemOffered', {})
                this_sku = item_offered.get('sku', None)
                if item_offered and this_sku == skuID:
                    attr = {}
                    details_url = self.PRODUCT_DETAILS.format(sku=this_sku)
                    variant['properties'] = attr
                    reqs.append(
                        Request(details_url,
                                self._parse_properties,
                                meta=response.meta))
                    break

            variants.append(variant)
            product['variants'] = variants

        if reqs:
            return self.send_next_request(reqs, response)

        return product

    def _parse_properties(self, response):
        product = response.meta['product']
        reqs = response.meta.get('reqs', [])
        data = json.loads(response.body)

        getSkuDetails = data.get('response', {}).get('getSkuDetails', [])

        if getSkuDetails:
            sku_details = getSkuDetails[0].get('skuDetails', [])

        if len(sku_details) > 0:
            detail = sku_details[0]['detail']
            skuSize = detail['skuSize']
            weight = detail['weight']
            flavor = detail['flavor']
            upcNumber = detail['upcNumber']

            variants = product.get('variants', [])
            skuID = sku_details[0].get('skuId', '')

            for idx, variant in enumerate(variants):
                # Check that the variant is not duplicated
                this_sku = variant.get('sku', None)
                if this_sku == skuID:
                    attr = {}
                    attr['Size'] = skuSize
                    attr['Flavor'] = flavor
                    attr['Weight'] = weight
                    attr['UPCNumber'] = upcNumber
                    variant['properties'] = attr
                    variants[idx] = variant
                    break

            product['variants'] = variants

        if reqs:
            return self.send_next_request(reqs, response)

        return product

    def _parse_variants(self, variants, main_skuID):
        if not variants:
            return None

        parsed_variants = []
        variants_visit = set()
        for variant in variants:
            # Check that the variant is not duplicated
            item_offered = variant.get('itemOffered', {})
            this_sku = item_offered.get('sku', None)
            if this_sku in variants_visit:
                continue
            variants_visit.add(this_sku)

            # Fill the Variant data
            vr = {}
            if variant['price']:
                vr['price'] = variant['price']
            availability = variant.get('availability', None)
            vr['in_stock'] = availability == "http://schema.org/InStock"
            vr['selected'] = main_skuID == this_sku
            if item_offered:
                attr = {}
                if item_offered.get('color'):
                    attr['Color'] = item_offered.get('color')
                if item_offered.get('color'):
                    attr['Weight'] = item_offered.get('weight').get('value')
                vr['properties'] = attr
                vr['url'] = item_offered.get('url')
            parsed_variants.append(vr)

        parsed_variants[0]['selected'] = True
        return parsed_variants

    def _parse_review(self, response):
        product = response.meta['product']
        reqs = response.meta.get('reqs', [])
        product['buyer_reviews'] = self.br.parse_buyer_reviews_products_json(
            response)
        if reqs:
            return self.send_next_request(reqs, response)

        return product

    def _scrape_total_matches(self, response):
        totals = response.xpath(
            '//*[@id="resultsTabs"]//'
            'a[@title="View Products"]/text()').re('\((\d+)\)')
        if len(totals) > 1:
            self.log(
                "Found more than one 'total matches' for %s" % response.url,
                ERROR)
        elif totals:
            total = totals[0].strip()
            self.total_matches_int = int(total)
            return int(total)
        else:
            self.log("Failed to find 'total matches' for %s" % response.url,
                     WARNING)
        return None

    def _scrape_product_links(self, response):
        all_links_iter = re.finditer(
            'detailsLink"\s*:\s*"(.*?)(\?skuId=\d+)?",', response.body)

        # Clean the links for the different variants of a product
        links_without_dup = []
        [
            links_without_dup.append(item)
            for item in map((lambda x: x.group(1)), all_links_iter)
            if item not in links_without_dup
        ]
        for link in links_without_dup:
            yield link, SiteProductItem()

    def _scrape_results_per_page(self, response):
        return 20

    def _scrape_next_results_page_link(self, response):
        url_parts = urlparse.urlsplit(response.url)
        query_string = urlparse.parse_qs(url_parts.query)

        ajax_search_url = self.SEARCH_URL_AJAX.format(
            referer=urllib.quote_plus(self.referer, ':'),
            page_num=self.current_page)
        self.current_page += 1

        if self.current_page * self.products_per_page > self.total_matches_int + 30:
            return

        headers = {
            'Accept':
            'application/json, text/plain, */*',
            'Cache-Control':
            'no-cache',
            'Connection':
            'keep-alive',
            'Host':
            'www.cvs.com',
            'Pragma':
            'no-cache',
            'Referer':
            self.referer,
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64)'
            ' AppleWebKit/537.36 (KHTML, like Gecko)'
            ' Chrome/49.0.2623.110 Safari/537.36'
        }

        return Request(ajax_search_url,
                       self.parse,
                       headers=headers,
                       meta=response.meta,
                       priority=1)

    def _parse_single_product(self, response):
        return self.parse_product(response)

    def _get_products(self, response):
        remaining = response.meta['remaining']
        search_term = response.meta['search_term']
        prods_per_page = response.meta.get('products_per_page')
        total_matches = response.meta.get('total_matches')
        scraped_results_per_page = response.meta.get(
            'scraped_results_per_page')

        prods = self._scrape_product_links(response)

        if not prods_per_page:
            # Materialize prods to get its size.
            prods = list(prods)
            prods_per_page = len(prods)
            response.meta['products_per_page'] = prods_per_page

        if scraped_results_per_page is None:
            scraped_results_per_page = self._scrape_results_per_page(response)
            if scraped_results_per_page:
                self.log(
                    "Found %s products at the first page" %
                    scraped_results_per_page, INFO)
            else:
                scraped_results_per_page = prods_per_page
                if hasattr(self, 'is_nothing_found'):
                    if not self.is_nothing_found(response):
                        self.log(
                            "Failed to scrape number of products per page",
                            ERROR)
            response.meta[
                'scraped_results_per_page'] = scraped_results_per_page

        if total_matches is None:
            total_matches = self._scrape_total_matches(response)
            if total_matches is not None:
                response.meta['total_matches'] = total_matches
                self.log("Found %d total matches." % total_matches, INFO)
            else:
                if hasattr(self, 'is_nothing_found'):
                    if not self.is_nothing_found(response):
                        self.log(
                            "Failed to parse total matches for %s" %
                            response.url, ERROR)

        if total_matches and not prods_per_page:
            # Parsing the page failed. Give up.
            self.log("Failed to get products for %s" % response.url, ERROR)
            return

        for i, (prod_url, prod_item) in enumerate(islice(prods, 0, remaining)):
            # Initialize the product as much as possible.
            prod_item['site'] = self.site_name
            prod_item['search_term'] = search_term
            prod_item['total_matches'] = total_matches
            prod_item['results_per_page'] = prods_per_page
            prod_item['scraped_results_per_page'] = scraped_results_per_page
            # The ranking is the position in this page plus the number of
            # products from other pages.
            prod_item['ranking'] = (i + 1) + (self.quantity - remaining)
            if self.user_agent_key not in ["desktop", "default"]:
                prod_item['is_mobile_agent'] = True

            if prod_url is None:
                # The product is complete, no need for another request.
                yield prod_item
            elif isinstance(prod_url, Request):
                cond_set_value(prod_item, 'url', prod_url.url)  # Tentative.
                yield prod_url
            else:
                # Another request is necessary to complete the product.
                url = urlparse.urljoin(response.url, prod_url)
                cond_set_value(prod_item, 'url', url)  # Tentative.
                yield Request(url,
                              callback=self.parse_product,
                              meta={'product': prod_item})