Example #1
0
    def parse_color_item(self, response):
        sel = Selector(response)
        
        index = response.meta['index']
        color_urls = response.meta['color_urls']
        baseItem = response.meta['baseItem']
        
        colorItem = Color()
        colorItem['type'] = 'color'
        colorItem['show_product_id'] = baseItem['show_product_id']
        colorItem['from_site'] = self.name
        colorItem['name'] = color_urls[index]['color_name']
        colorItem['cover'] = color_urls[index]['color_cover']
        
        images = []
        
        imageItem = ImageItem()
        image_url = sel.xpath('//meta[@property="og:image"]/@content').extract()[0]
            
        imageItem['image'] = re.sub(r'wid=\d+&hei=\d+', 'wid=1000&hei=1000', image_url)
        imageItem['thumbnail'] = re.sub(r'wid=\d+&hei=\d+', 'wid=50&hei=50', image_url)
        
        images.append(imageItem)
        
        image_url2 = sel.xpath('//div[@id="productSwatch"]/img/@src').extract()[0]

        imageItem = ImageItem()
        imageItem['image'] = re.sub(r'wid=\d+&hei=\d+', 'wid=1000&hei=1000', image_url2)
        imageItem['thumbnail'] = re.sub(r'wid=\d+&hei=\d+', 'wid=50&hei=50', image_url2)
        
        images.append(imageItem)
        
        colorItem['images'] = images
            
        yield colorItem
        
        skus = response.meta['skus']
        skuItem = SkuItem()
        skuItem['type'] = 'sku'
        skuItem['show_product_id'] = baseItem['show_product_id']
        skuItem['from_site'] = self.name
        skuItem['current_price'] = sel.xpath('//div[@class="prodprice saleprice"]/p/span[@itemprop="price"]/text()').extract()[0]
        if len(sel.xpath('//div[@class="prodprice saleprice"]/p/span[@class="basePrice"]/text()').extract()) > 0:
            skuItem['list_price'] = sel.xpath('//div[@class="prodprice saleprice"]/p/span[@class="basePrice"]/text()').extract()[0]
        else:
            skuItem['list_price'] = skuItem['current_price']
        skuItem['is_outof_stock'] = False
        skuItem['color'] = color_urls[index]['color_name']
        skuItem['size'] = 'one-size'
        skuItem['id'] = baseItem['show_product_id']
        skus.append(skuItem)
        
        if index + 1 == len(color_urls):
            baseItem['skus'] = skus
            
            yield baseItem
        else:
            yield Request(color_urls[index+1]['url'], callback=self.parse_color_item
                          , meta={'baseItem': baseItem, 'color_urls': color_urls, 'index': index+1, 'skus': skus})
Example #2
0
    def parse_color_sku(self, response):
        baseItem = response.meta['baseItem']
        images_tmp = response.meta['images']
        jsonStr = json.loads(response.body)

        colors = []
        skus = []
        sizes = []
        for col in jsonStr['Colors']:
            images = []
            for img in images_tmp:
                imageItem = ImageItem()

                imageItem['thumbnail'] = '%s%s_%s_%s.%s' % (
                    img['base'], col['Code10'], img['thum_size'], img['index'],
                    img['thum_ext'])
                imageItem['image'] = '%s%s_%s_%s.%s' % (
                    img['base'], col['Code10'], img['img_size'], img['index'],
                    img['img_ext'])
                images.append(imageItem)

            color = Color()
            color['type'] = 'color'
            color['from_site'] = 'thecorner'
            color['show_product_id'] = baseItem['show_product_id']
            color['images'] = images
            color['name'] = col['Description']
            color['cover_style'] = '#' + col['Rgb']
            #color['cover_style'] = 'background-color: #%s;' % (col['Rgb'])
            colors.append(col['Description'])
            yield color

        for size in jsonStr['ModelColorSizes']:
            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = baseItem['show_product_id']
            skuItem['from_site'] = 'thecorner'
            skuItem['id'] = size['Color']['Description'].encode(
                "utf-8") + "*" + size['Size']['Description']
            skuItem['list_price'] = baseItem['list_price']
            skuItem['current_price'] = baseItem['current_price']
            skuItem['size'] = size['Size']['Description']
            skuItem['color'] = size['Color']['Description']
            skuItem['is_outof_stock'] = False
            skuItem['quantity'] = size['Quantity']
            sizes.append(size['Size']['Description'])
            skus.append(skuItem)

        baseItem['skus'] = skus
        baseItem['colors'] = list(set(colors))
        baseItem['sizes'] = list(set(sizes))

        yield baseItem
Example #3
0
 def parse_sku_item(self, response):
     sel = Selector(response)
     item = response.meta['item']
     index = response.meta['index']
     sku_size_list = response.meta['sku_size_list']
     sku_item_url_list = response.meta['sku_item_url_list']
     content = demjson.decode(response.body)
     newSizeData = Selector(text=content['newSizeData'])
     if index >= len(sku_size_list):
         item['sizes'] = sku_size_list
         yield item
     else:
         skuItem = SkuItem()
         skuItem['type'] = 'sku'
         skuItem['show_product_id'] = item['show_product_id']
         skuItem['color'] = u'one color'
         skuItem['size'] = sku_size_list[index]
         skuItem['id'] = content['prodId']
         skuItem['from_site'] = self.name
         if len(newSizeData.xpath(".//div[@class='outOfStockMsg']")) > 0:
             skuItem['list_price'] = item['list_price']
             skuItem['current_price'] = item['current_price']
             skuItem['is_outof_stock'] = True
         else:
             skuItem['list_price'] = newSizeData.xpath(
                 ".//table/tbody/tr[1]/td/text()").extract()[0].strip()[1:]
             skuItem['current_price'] = newSizeData.xpath(
                 ".//td[@class='highlight']/text()").extract()[0].strip(
                 )[1:]
             skuItem['is_outof_stock'] = False
         item['skus'].append(skuItem)
         index = index + 1
         yield Request(sku_item_url_list[index],
                       callback=self.parse_sku_item,
                       meta={
                           "sku_size_list": sku_size_list,
                           "sku_item_url_list": sku_item_url_list,
                           "item": item,
                           "index": index
                       })
Example #4
0
    def handle_parse_item(self, response, baseItem):
        sel = Selector(response)
        baseItem['dimensions'] = ['size', 'color']
        baseItem['desc'] = sel.xpath(
            '//div[@id="aDescriptionBody"]').extract()[0]
        baseItem['brand'] = sel.xpath(
            '//span[@itemprop="brand"]/text()').extract()[0]
        skus = []
        skuItem = SkuItem()
        skuItem['type'] = 'sku'
        skuItem['show_product_id'] = baseItem['show_product_id']
        skuItem['from_site'] = self.name
        skuItem['current_price'] = baseItem['current_price']
        skuItem['list_price'] = baseItem['list_price']
        skuItem['is_outof_stock'] = False
        skuItem['color'] = 'one-color'
        skuItem['size'] = 'one-size'
        skuItem['id'] = baseItem['show_product_id']
        skus.append(skuItem)
        imageItem = ImageItem()
        imageItem['image'] = 'http:' + sel.xpath(
            '//img[@id="ImageUrl"]/@src').extract()[0]
        imageItem['thumbnail'] = imageItem['image']

        images = []
        images.append(imageItem)

        colorItem = Color()
        colorItem['type'] = 'color'
        colorItem['show_product_id'] = baseItem['show_product_id']
        colorItem['from_site'] = self.name
        colorItem['images'] = images
        colorItem['name'] = 'one-color'
        yield colorItem
        baseItem['skus'] = skus
        yield baseItem
Example #5
0
    def handle_color_item(self, response):
        sel = Selector(response)
        item = response.meta['item']
        image_color_dict = response.meta['image_color_dict']
        colorUrls = response.meta['colorUrls']
        index = response.meta['index']
        skuItem = SkuItem()
        data_colorid = sel.xpath(
            ".//li[@class='selected']/a/@data-colorid").extract()[0]
        images = image_color_dict[data_colorid]
        colorItem = Color()
        colorItem['images'] = images
        colorItem['type'] = 'color'
        colorItem['from_site'] = self.name
        colorItem['show_product_id'] = sel.xpath(
            ".//span[@itemprop='productID']/text()").extract()[0]
        colorItem['name'] = sel.xpath(
            ".//span[@class='selected-value colorValueLabel']/text()").extract(
            )[0].strip()
        cover = sel.xpath(".//li[@class='selected']/a/@style").extract()[0]
        colorItem['cover'] = re.findall('\(.+\)', cover)[0][1:-1]
        item['colors'].append(colorItem['name'])

        yield colorItem

        skuItem['color'] = colorItem['name']
        skuItem['type'] = 'sku'
        skuItem['show_product_id'] = sel.xpath(
            ".//span[@itemprop='productID']/text()").extract()[0]
        if len(
                sel.xpath(
                    ".//span[@data-event-label='Full Price']/@data-event-value"
                )) > 0:
            skuItem['list_price'] = sel.xpath(
                ".//span[@data-event-label='Full Price']/@data-event-value"
            ).extract()[0].strip()
            skuItem['current_price'] = sel.xpath(
                ".//span[@data-event-label='Full Price']/@data-event-value"
            ).extract()[0].strip()
        else:
            skuItem['current_price'] = sel.xpath(
                ".//span[@data-event-label='Full Minimum Price']/@data-event-value"
            ).extract()[0].strip()
            skuItem['list_price'] = sel.xpath(
                ".//span[@data-event-label='Full Maximum Price']/@data-event-value"
            ).extract()[0].strip()
        skuItem['size'] = u'one size'
        skuItem['id'] = data_colorid
        skuItem['from_site'] = self.name
        sku_quantity = sel.xpath(
            ".//select[@name='Quantity']/@data-available").extract()[0]
        if sku_quantity == 0:
            skuItem['is_outof_stock'] = True
        else:
            skuItem['is_outof_stock'] = False
        item['skus'].append(skuItem)
        item['sizes'] = [u'one size']
        index = index + 1
        if index >= len(colorUrls):
            item['colors'] = list(set(item['colors']))
            yield item

        else:
            yield Request(colorUrls[index],
                          callback=self.handle_color_item,
                          meta={
                              'image_color_dict': image_color_dict,
                              'colorUrls': colorUrls,
                              'index': index,
                              'item': item
                          })
Example #6
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)

        item['show_product_id'] = re.search('(\d+)\.html',
                                            response.url).group(1)
        item['title'] = sel.xpath(
            '//div[@class="product-name"]/h1/text()').extract()[0].strip()
        brand = re.search('brand: \'(.+)\'', response.body)
        if not brand:
            item['brand'] = 'pharmacyonline'
        else:
            item['brand'] = brand.group(1)

        img = re.search('imgUrl: \'(.+)\'', response.body).group(1)
        images = []
        imageItem = ImageItem()
        imageItem[
            'thumbnail'] = img + '?imageMogr2/thumbnail/380x380/extent/380x380/background/d2hpdGU='
        imageItem['image'] = img
        images.append(imageItem)

        # item['cover'] = images[0]['thumbnail']

        item['colors'] = ['One Color']
        color = Color()
        color['type'] = 'color'
        color['from_site'] = item['from_site']
        color['show_product_id'] = item['show_product_id']
        color['images'] = images
        color['name'] = 'One Color'
        color['cover'] = images[0][
            'image'] + '?imageMogr2/thumbnail/100x100/extent/100x100/background/d2hpdGU='
        yield color

        item['desc'] = sel.xpath(
            '//div[@class="product-collateral"]').extract()[0]
        current_price = sel.xpath(
            '//div[@class="DetailNoDis PriceNow last_price_sing"]/span/text()')
        if len(current_price) > 0:
            item['current_price'] = current_price.extract()[0]
            item['list_price'] = item['current_price']
        else:
            item['current_price'] = sel.xpath(
                '//div[@class="DetailPriceContain clearfix"]//div[@class="PriceNow"]/text()'
            ).extract()[0].strip()
            item['list_price'] = sel.xpath(
                '//div[@class="DetailPriceContain clearfix"]//p[@class="PriceWas"]/text()'
            ).extract()[0].strip()

        skus = []
        item['sizes'] = ['One Size']
        skuItem = SkuItem()
        skuItem['type'] = "sku"
        skuItem['from_site'] = item['from_site']
        sku_id = sel.xpath(
            '//div[@class="DetailSku"]/text()').extract()[0].strip()
        skuItem['id'] = re.search('(\d+)', sku_id).group(1)
        skuItem['show_product_id'] = item['show_product_id']
        skuItem['current_price'] = item['current_price']
        skuItem['list_price'] = item['list_price']
        skuItem['size'] = 'One Size'
        skuItem['color'] = 'One Color'
        skus.append(skuItem)
        item['skus'] = skus
        item['dimensions'] = ['size']
        if len(item['show_product_id']) > 6:
            product_id = item['show_product_id'][1:]
        else:
            product_id = item['show_product_id']
        stock_url = 'http://cn.pharmacyonline.com.au/pt_catalog/index/checkQty?product_id=' + product_id
        yield Request(stock_url,
                      callback=self.parse_stock,
                      meta={"item": item},
                      dont_filter=True)
    def handle_parse_item(self, response, item):
        match = re.search(
            r'<script type\=\"application\/json\">({"ProductDetails".+?)<\/script>',
            response.body)
        print match.group(1)
        sel = Selector(response)
        if match is None:
            return

        context = execjs.compile('''
            var json = %s
            function getJson(){
                return json;
            }
        ''' % match.group(1))

        product_json = context.call('getJson')

        main_product = product_json['ProductDetails']['main_products'][0]

        item['brand'] = main_product['brand_name']['label']
        item['title'] = main_product['short_description']
        show_product_id = main_product['product_code']
        item['show_product_id'] = show_product_id
        item['desc'] = main_product['description']

        list_price = main_product['price']['list_price']['usd_currency_value']
        if re.findall('\-', list_price):
            re.search('([\d\.]+)\s*\-', list_price).group(1)
        else:
            item['list_price'] = list_price

        sale_price = main_product['price']['sale_price']['usd_currency_value']
        if re.findall('\-', sale_price):
            re.search('([\d\.]+)\s*\-', sale_price).group(1)
        else:
            item['current_price'] = sale_price

        item['dimensions'] = ['size']
        skus = []
        sizes = {}
        sizes['size'] = []
        color_names = []

        colors = main_product['colors']['colors']

        handle_color_map = {}
        if len(colors) > 0:
            for color in colors:
                handle_color_map[color['id']] = color['label']

        handle_size_map = {}
        if len(main_product['sizes']['sizes']) == 0:
            sizes['size'].append('onesize')
        else:
            for size in main_product['sizes']['sizes']:
                handle_size_map[size['id']] = size['value']
                sizes['size'].append(size['value'])

        image_prefix = 'http:' + main_product['media'][
            'images_server_url'] + main_product['media']['images_path']

        if len(colors) == 0:
            color_name = 'onecolor'
            color_names.append(color_name)

            common_images = main_product['media']['images']

            images = []

            for common_image in common_images:
                imageItem = ImageItem()
                imageItem[
                    'image'] = image_prefix + common_image + '?wid=970&hei=1293&fmt=jpg'
                imageItem[
                    'thumbnail'] = image_prefix + common_image + '?wid=396&hei=528&fmt=jpg'
                images.append(imageItem)

            first_thumbnail = images[0]['thumbnail']

            colorItem = Color()

            colorItem['type'] = 'color'
            colorItem['from_site'] = item['from_site']
            colorItem['show_product_id'] = item['show_product_id']
            colorItem['images'] = images
            colorItem['name'] = color_name
            colorItem['cover'] = first_thumbnail
            colorItem['version'] = '1'
            yield colorItem
        else:
            common_images = main_product['media']['images']

            for color in colors:
                color_name = color['label']
                color_names.append(color_name)

                images = []

                imageItem = ImageItem()

                imageItem['image'] = image_prefix + color[
                    'colorize_image_url'] + '?wid=970&hei=1293&fmt=jpg'
                imageItem['thumbnail'] = image_prefix + color[
                    'colorize_image_url'] + '?wid=396&hei=528&fmt=jpg'

                images.append(imageItem)

                first_thumbnail = images[0]['thumbnail']

                for common_image in common_images:

                    imageItem = ImageItem()

                    imageItem[
                        'image'] = image_prefix + common_image + '?wid=970&hei=1293&fmt=jpg'
                    imageItem[
                        'thumbnail'] = image_prefix + common_image + '?wid=396&hei=528&fmt=jpg'

                    images.append(imageItem)

                colorItem = Color()

                colorItem['type'] = 'color'
                colorItem['from_site'] = item['from_site']
                colorItem['show_product_id'] = item['show_product_id']
                colorItem['images'] = images
                colorItem['name'] = color_name
                colorItem['version'] = '1'
                if len(color['value']) > 0:
                    if re.findall('\#', color['value']):
                        colorItem['cover_style'] = color['value']
                    else:
                        cover_img_str = sel.xpath(
                            '//li[@class="product-color-options__value" and @data-colorid='
                            + str(color["id"]) + ']/@style').extract()
                        cover_unavi_str = sel.xpath(
                            '//li[@class="product-color-options__value product-color-options__value--unavailable" and @data-colorid='
                            + str(color["id"]) + ']/@style').extract()
                        cover_sel_str = sel.xpath(
                            '//li[@class="product-color-options__value product-color-options__value--selected" and @data-colorid='
                            + str(color["id"]) + ']/@style').extract()
                        cover_hid_str = sel.xpath(
                            '//li[@class="product-color-options__value is-hidden" and @data-colorid='
                            + str(color["id"]) + ']/@style').extract()

                        if len(cover_img_str) > 0:
                            cover_img = re.search('\((.+)\)',
                                                  cover_img_str[0]).group(1)
                            colorItem['cover'] = 'http:' + cover_img
                        elif len(cover_unavi_str) > 0:
                            cover_img_str = cover_unavi_str[0]
                            cover_img = re.search('\((.+)\)',
                                                  cover_img_str).group(1)
                            colorItem['cover'] = 'http:' + cover_img
                        elif len(cover_sel_str) > 0:
                            cover_img_str = cover_sel_str[0]
                            cover_img = re.search('\((.+)\)',
                                                  cover_img_str).group(1)
                            colorItem['cover'] = 'http:' + cover_img
                        elif len(cover_hid_str) > 0:
                            cover_img_str = cover_hid_str[0]
                            cover_img = re.search('\((.+)\)',
                                                  cover_img_str).group(1)
                            colorItem['cover'] = 'http:' + cover_img
                        else:
                            colorItem['cover'] = first_thumbnail
                else:
                    colorItem['cover'] = first_thumbnail

                yield colorItem

        item['colors'] = color_names

        for sku in main_product['skus']['skus']:
            sku_id = sku['sku_id']
            if sku_id == 'DUMMY':
                continue

            if sku['color_id'] == -1:
                color_name = 'onecolor'
            else:
                color_name = handle_color_map[sku['color_id']]

            if sku['size_id'] == -1:
                size = 'onesize'
            else:
                size = handle_size_map[sku['size_id']]

            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = item['show_product_id']
            skuItem['from_site'] = item['from_site']
            skuItem['id'] = sku_id
            skuItem['size'] = size
            skuItem['color'] = color_name

            if sku['status_alias'] == 'soldout' or sku[
                    'status_alias'] == 'waitlist':
                skuItem['is_outof_stock'] = True
            else:
                skuItem['is_outof_stock'] = False

            if len(sku['price']['sale_price']['usd_currency_value']) > 0:
                skuItem['current_price'] = sku['price']['sale_price'][
                    'usd_currency_value']
            else:
                continue

            if len(sku['price']['list_price']['usd_currency_value']) > 0:
                skuItem['list_price'] = sku['price']['list_price'][
                    'usd_currency_value']
            else:
                continue

            skus.append(skuItem)

        item['sizes'] = sizes
        item['skus'] = skus

        if main_product['size_guide_link']['enabled'] == True:
            sizeInfo = main_product['size_guide_link']['url']
            findQ = sizeInfo.find("?")
            if findQ != -1:
                item['size_info'] = sizeInfo[:findQ]
            else:
                item['size_info'] = sizeInfo

        yield item
Example #8
0
    def handle_parse_item(self, response, item):
        if re.match(r'^http:\/\/us\.asos\.com\/mp_sp\/',response.url):

            sel = Selector(response)

            url = sel.xpath('//li[@id="mp_li_cnti"]/a/@href').extract()[0]

            yield Request(url, callback=self.parse_item, cookies={'asos': 'currencyid=1'}, meta={'item': item})
        else:
            skus=[]
            sel=Selector(response)
            json_info = re.search("view\(\'(.+\})\'\,", response.body)
            if not json_info:
                return
            else:
                json_info = json_info.group(1)
            json_info = "".join(json_info)
            json_info = json_info.decode("string-escape")
            goods_detail = json.loads(json_info)
            descs = sel.xpath('//div[@class="overflow-container"]/div/div')
            item['desc'] = ''
            for desc in descs:
                item['desc'] = item['desc'] + desc.extract()
            item['title'] = goods_detail['name']
            if 'brandName' not in goods_detail.keys():
                item['brand'] = 'asos'
            else:
                item['brand'] = goods_detail['brandName']
            item['from_site'] = self.name
            if 'price' not in goods_detail.keys():
                return
            item['current_price'] = goods_detail['price']['current']
            if float(goods_detail['price']['previous']) != 0:
                item['list_price'] = goods_detail['price']['previous']
            elif float(goods_detail['price']['rrp']) != 0:
                item['list_price'] = goods_detail['price']['rrp']
            else:
                item['list_price'] = goods_detail['price']['current']

            item['show_product_id'] = goods_detail['id']

            sizes = []
            colors = []
            for sku in goods_detail['variants']:
                skuItem = SkuItem()
                skuItem['type'] = "sku"
                skuItem['from_site'] = self.name
                skuItem['is_outof_stock'] = False
                skuItem['id'] = sku['variantId']
                skuItem['show_product_id'] = goods_detail['id']
                skuItem['current_price'] = item['current_price']
                skuItem['list_price'] = item['list_price']
                skuItem['size'] = sku['size']
                if sku['size'] not in sizes:
                    sizes.append(sku['size'])
                skuItem['color'] = sku['colour']

                if sku['colour'] not in colors:
                    colors.append(sku['colour'])
                skus.append(skuItem)
            for color_name in colors:
                images = []
                for image in goods_detail['images']:
                    if image['colour'] == '' or (image['colour'] and color_name and len(image['colour']) == len(color_name) and (len(color_name) - difflib.SequenceMatcher(None,color_name,image['colour']).ratio()*len(color_name)) <=1):
                        imageItem = ImageItem()
                        imageItem['image'] = image['url'] + '?$XXL$'
                        imageItem['thumbnail'] = image['url']
                        images.append(imageItem)

                color = Color()
                color['type'] = 'color'
                color['from_site'] = self.name
                color['show_product_id'] = goods_detail['id']
                color['images'] = images
                color['name'] = color_name
                color['cover'] = images[0]['image']

                yield color

            item['skus'] = skus
            item['sizes'] = list(set(sizes))
            item['dimensions'] = ['size']
            item['colors'] = colors
            related_products_url = 'http://us.asos.com/api/product/catalogue/v2/productgroups/ctl/' + str(item['show_product_id']) + '?store=US&store=US&currency=USD'

            yield Request('http://us.asos.com/api/product/catalogue/v2/stockprice?productIds=' + str(goods_detail['id']) + '&store=US&currency=USD', callback=self.parse_stock, meta={'item': item, 'related_products_url': related_products_url})
    #         color_size_str="".join(re.findall(r"var\s+arrSzeCol_ctl00_ContentMainPage_ctlSeparateProduct[^<]+", response.body))
    #         sep_image_str="".join(re.findall(r"var\s+arrSepImage_ctl00_ContentMainPage_ctlSeparateProduct[^<]+", response.body))
    #         thumb_image_str="".join(re.findall(r"var\s+arrThumbImage_ctl00_ContentMainPage_ctlSeparateProduct[^<]+", response.body))
    #         if len(color_size_str)>0:
    #             context = execjs.compile('''
    #                 %s
    #                 %s
    #                 %s
    #                 function get_color_size(){
    #                     return arrSzeCol_ctl00_ContentMainPage_ctlSeparateProduct;
    #                   }
    #                 function get_sep_image(){
    #                     return arrSepImage_ctl00_ContentMainPage_ctlSeparateProduct;
    #                   }
    #                 function get_thumb_image(){
    #                     return arrThumbImage_ctl00_ContentMainPage_ctlSeparateProduct;
    #                   }
    #             ''' % (color_size_str, sep_image_str, thumb_image_str))
    #             color_sizes = context.call('get_color_size')
    #             sep_image= context.call('get_sep_image')
    #             thumb_images = context.call('get_thumb_image')
    #             #import pdb;pdb.set_trace()
    #         if len(sel.xpath('//div[@id="ctl00_ContentMainPage_ctlSeparateProduct_pnlOutofStock"]').extract()) > 0:
    #             return
    #
    #         if len(sel.xpath('//span[@id="ctl00_ContentMainPage_ctlSeparateProduct_lblProductTitle"]/text()').extract()) > 0:
    #             item['title']=sel.xpath('//span[@id="ctl00_ContentMainPage_ctlSeparateProduct_lblProductTitle"]/text()').extract()[0]
    #
    #             data_dic_str = sel.xpath('//script[@id="dataDictionary"]/text()')
    #
    #             product_data_str=data_dic_str.re(r'^var Product\s*=\s*({.*?});')[0]
    #             product_data=eval(product_data_str)
    #             item['show_product_id']=product_data['ProductIID']
    #             desc=sel.xpath('//div[@id="ctl00_ContentMainPage_productInfoPanel"]//ul')
    #             if len(desc)>0:
    #                 item['desc']=desc.extract()[0]
    #             item['brand']=product_data['ProductBrand']
    #             item['from_site']=self.name
    #
    #             '''有严重问题,注释掉了'''
    # #             gender_category_str=product_data['ProductCategory']
    # #             m=re.search(r'(.+)\|(.+)', gender_category_str)
    # #             if m:
    # #                 item['gender']=m.group(1).strip()
    # #             m=re.search(r'(.+)\|(.+)', gender_category_str)
    # #             if m:
    # #                 item['category']=m.group(2).strip()
    #
    #             sku_data_str = data_dic_str.re(r'var ProductChildSkuInfo\s*=\s*({.*?});')[0]
    #             sku_data=eval(sku_data_str)
    #             sku_data_list=sku_data['ChildSkuInfo'][item['show_product_id']]
    #             #color_list=sel.xpath('//select[@id="ctl00_ContentMainPage_ctlSeparateProduct_drpdwnColour"]').extract()
    #             if color_sizes:
    #                 '''handle color and image'''
    #
    # #                 thumbnail_lis=sel.xpath('//ul[@class="productThumbnails"]//li//img/@src')
    # #                 image_lis=sel.xpath('//div[@id="productImages"]//img/@src')
    # #                 if len(thumbnail_lis)>0:
    # #                     for i in range(len(thumbnail_lis)):
    # #                         imageItem=ImageItem()
    # #                         imageItem['image']=image_lis[i].extract()
    # #                         imageItem['thumbnail']=thumbnail_lis[i].extract()
    # #                         images.append(imageItem)
    #                 #left three imageItem
    #                 images=[]
    #                 for thumb_image in thumb_images:
    #                     imageItem=ImageItem()
    #                     imageItem['image']=thumb_image[2]
    #                     imageItem['thumbnail']=thumb_image[0]
    #                     images.append(imageItem)
    #
    #                 item_color_names=[]
    #                 #all color names of item
    #
    #                 sep_image_dict = {}
    #                 for sep_image_arr in sep_image:
    #                     key = sep_image_arr[3]
    #                     sep_image_dict[key] = {'image': sep_image_arr[2], 'thumbnail': sep_image_arr[0]}
    #
    #                 color_names = sel.xpath('//div[@id="ctl00_ContentMainPage_ctlSeparateProduct_pnlColour"]//option/@value')[1:].extract()
    #                 for color_name in color_names:
    #
    #                     lower_color_name = color_name.lower()
    #                     if '/' in lower_color_name:
    #                         lower_color_name_2 = lower_color_name.replace('/', '')
    #                     else:
    #                         lower_color_name_2 = lower_color_name
    #                     if lower_color_name not in sep_image_dict.keys() and lower_color_name_2 not in sep_image_dict.keys():
    #                         return
    #                     imageItem=ImageItem()
    #                     imageItem['thumbnail']= sep_image_dict[lower_color_name_2]['thumbnail']
    #                     imageItem['image']= sep_image_dict[lower_color_name_2]['image']
    #                     images.insert(0, imageItem)
    #                     #                     import pdb;pdb.set_trace()
    #                     color=Color()
    #                     color['type'] ='color'
    #                     color['from_site'] = self.name
    #                     color['show_product_id'] = product_data['ProductIID']
    #                     color['images'] = images
    #                     color['name'] = color_name
    #                     color['cover'] = sep_image_dict[lower_color_name_2]['thumbnail']
    #
    #                     yield color
    #
    #                     item_color_names.append(color_name)
    #                 '''handle price'''
    #                 #list_price_sel=sel.xpath('//span[@id="ctl00_ContentMainPage_ctlSeparateProduct_lblRRP"]')
    #                 sizes=[]
    #                 for color_size in color_sizes:
    #                     size_id = color_size[0]
    #                     size = color_size[1]
    #                     if not size.strip():
    #                         size = 'onesize'
    #
    #                     if color_size[3] == "False":
    #                         continue
    #
    #                     original_color_name = color_size[2]
    #                     for color_name in item_color_names:
    #                         tmp_color_name = re.sub(r'[^\w]', '', color_name)
    #
    #                         if tmp_color_name == original_color_name:
    #                             original_color_name = color_name
    #
    #                     skuItem=SkuItem()
    #                     skuItem['type']="sku"
    #                     skuItem['from_site']=self.name
    #                     skuItem['is_outof_stock']=False
    #                     skuItem['id']=sku_data_list[str(size_id)+original_color_name]['Sku']
    #                     #skuItem['id']=color_size[0]
    #                     skuItem['show_product_id']=product_data['ProductIID']
    #                     skuItem['current_price']= color_size[5]
    #
    #                     if color_size[6] == color_size[5] and color_size[8] != '0' and color_size[8] != '0.00':
    #                         skuItem['list_price']= color_size[8]
    #                     else:
    #                         skuItem['list_price']= color_size[6]
    #
    #                     sizes.append(size)
    #                     skuItem['color'] = original_color_name
    #                     skuItem['size'] = size
    #                     skus.append(skuItem)
    #
    #                 item['skus']=skus
    #                 item['sizes']=list(set(sizes))
    #                 item['dimensions']=['size']
    #                 item['colors'] = item_color_names
    #                 size_info = sel.xpath('//a[@id="ctl00_ContentMainPage_SizeGuideButton_SizeGuideLink"]/@href')
    #                 if size_info:
    #                     item['size_info'] = size_info.extract()[0]
    #                     if not re.match(r'^http', size_info.extract()[0]):
    #                         item['size_info'] = self.base_url + size_info.extract()[0]
    #             yield item
Example #9
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)
        item['show_product_id'] = str(
            re.search('productID: \"(\d+)\"', response.body).group(1)).strip()
        item['brand'] = str(
            re.search('productBrand: \"(.+)\"',
                      response.body).group(1)).strip()
        item['title'] = sel.xpath(
            './/h1[@data-track="product-title"]/text()').extract()[0].strip()
        item['desc'] = ''.join(
            sel.xpath('//div[@itemprop="description"]/p').extract()).strip()
        item['current_price'] = sel.xpath(
            '//span[@class="price"]/text()').extract()[0].strip()
        list_price_search = re.search('rrp: .+\&\#36\;([\d\.]+).+',
                                      response.body)
        if list_price_search:
            item['list_price'] = list_price_search.group(1)
        else:
            item['list_price'] = item['current_price']

        images = []
        image_divs = sel.xpath(
            '//div[@class="product-thumb-box productImageZoom__thumbnailContainer "]'
        )
        if not image_divs:
            return
        for image_div in image_divs:
            imageItem = ImageItem()
            imageItem['thumbnail'] = image_div.xpath('./img/@src').extract()[0]
            imageItem['image'] = image_div.xpath(
                './parent::*/@href').extract()[0]
            images.append(imageItem)

        color_names = sel.xpath(
            '//select[@id="opts-2"]/option[position()>1]/text()').extract()
        if len(color_names) > 1:
            return
        if not color_names:
            color_names = ['One Color']
        item['colors'] = color_names
        color = Color()
        color['type'] = 'color'
        color['from_site'] = item['from_site']
        color['show_product_id'] = item['show_product_id']
        color['images'] = images
        color['name'] = color_names[0]
        color['cover'] = images[0]['thumbnail']
        yield color

        skus = []
        sizes = sel.xpath(
            '//select[@id="opts-1"]/option[position()>1]/text()').extract()
        if not sizes:
            sizes = ['One Size']
        item['sizes'] = sizes
        for size in sizes:
            for color_name in color_names:
                skuItem = SkuItem()
                skuItem['type'] = "sku"
                skuItem['from_site'] = item['from_site']
                skuItem['id'] = item[
                    'show_product_id'] + '-' + color_name + '-' + size
                skuItem['show_product_id'] = item['show_product_id']
                skuItem['current_price'] = item['current_price']
                skuItem['list_price'] = item['list_price']
                skuItem['size'] = size
                skuItem['color'] = color_name
                skus.append(skuItem)
        item['skus'] = skus
        item['dimensions'] = ['size']
        yield item
Example #10
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)
        if len(sel.xpath('//form[@id="product-form"]//meta').extract()) > 1:
            return
        if len(sel.xpath('//div[@class="sold-out-details"]')) > 0:
            return
        item['show_product_id'] = sel.xpath(
            '//div[@class="product-code"]/span/text()').extract()[0].strip()
        imgs = sel.xpath(
            '//div[@class="container-imagery"]//ul[@class="thumbnails no-carousel"]/li/img/@src'
        ).extract()
        if len(imgs) == 0:
            imgs = sel.xpath(
                '//div[@class="container-imagery"]//ul[@class="swiper-wrapper"]/li/img/@src'
            ).extract()
        images = []
        for img in imgs:
            if 'http:' not in img:
                img = 'http:' + img
            if 'xs.jpg' in img:
                img = img.replace('xs.jpg', 'pp.jpg')
            imageItem = ImageItem()
            imageItem['image'] = img
            imageItem['thumbnail'] = img.replace('pp.jpg', 'm.jpg')
            images.append(imageItem)
        colorItem = Color()
        colorItem['images'] = images
        colorItem['type'] = 'color'
        colorItem['from_site'] = item['from_site']
        colorItem['show_product_id'] = item['show_product_id']
        colorItem['name'] = 'One Color'
        colorItem['cover'] = images[0]['image'].replace('pp.jpg', 'xs.jpg')
        # colorItem['cover'] = images[0]['image'].split('_')[0] + '_sw.jpg'
        # print colorItem['cover']
        # req = requests.get(colorItem['cover'])
        # if not req.ok:
        #     colorItem['cover'] = images[0]['image'].replace('pp.jpg', 'xs.jpg')
        yield colorItem

        price = int(
            sel.xpath('//form[@id="product-form"]/meta/@data-price-full').
            extract()[0]) / 100
        if len(sel.xpath('//select-dropdown[@class="sku"]/@options')) > 0:
            sku_str = sel.xpath(
                '//select-dropdown[@class="sku"]/@options').extract()[0]
            skus = json.loads(sku_str)
            item['skus'] = []
            sizes = []
            for sku in skus:
                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['show_product_id'] = item['show_product_id']
                skuItem['list_price'] = price
                skuItem['current_price'] = price
                skuItem['color'] = 'One Color'
                skuItem['size'] = sku['data']['size']
                sizes.append(sku['data']['size'])
                skuItem['id'] = sku['id']
                skuItem['from_site'] = item['from_site']
                if sku['stockLevel'] == 'In_Stock' or sku[
                        'stockLevel'] == 'Low_Stock':
                    skuItem['is_outof_stock'] = False
                else:
                    skuItem['is_outof_stock'] = True
                item['skus'].append(skuItem)
        else:
            item['skus'] = []
            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = item['show_product_id']
            skuItem['list_price'] = price
            skuItem['current_price'] = price
            skuItem['color'] = 'One Color'
            sizes = ['One Size']
            skuItem['size'] = 'One Size'
            skuItem['id'] = sel.xpath(
                '//input [@class="sku"]/@value').extract()[0]
            stock_level = sel.xpath(
                '//input [@class="sku"]/@data-stock').extract()[0]
            if stock_level == 'In_Stock' or stock_level == 'Low_Stock':
                skuItem['is_outof_stock'] = False
            else:
                skuItem['is_outof_stock'] = True
            skuItem['from_site'] = item['from_site']
            item['skus'].append(skuItem)
        item['gender'] = self.gender
        item['colors'] = ['One Color']
        item['sizes'] = sizes
        item['desc'] = ''
        if len(sel.xpath('//widget-show-hide[@id="accordion-1"]//ul/li')) > 0:
            item['desc'] = item['desc'] + sel.xpath(
                '//widget-show-hide[@id="accordion-1"]//ul/li').extract()[0]
        if len(sel.xpath('//widget-show-hide[@id="accordion-2"]//ul/li')) > 0:
            item['desc'] = item['desc'] + sel.xpath(
                '//widget-show-hide[@id="accordion-2"]//ul/li').extract()[0]
        if len(sel.xpath('//widget-show-hide[@id="accordion-2"]//p')) > 0:
            item['desc'] = item['desc'] + sel.xpath(
                '//widget-show-hide[@id="accordion-2"]//p').extract()[0]

        product_items = sel.xpath(
            '//widget-show-hide[@name="Editor\'s Notes"]/div[@class="show-hide-content"]/div/p/a'
        )
        if len(product_items) > 0:
            related_items_id = []
            for product_item in product_items:
                product_id = product_item.xpath('./@href').extract()[0].split(
                    '/')[-1]
                related_items_id.append(product_id)
            if related_items_id:
                item['related_items_id'] = related_items_id

        media_url = 'https://video.net-a-porter.com/videos/productPage/' + item[
            'show_product_id'] + '_detail.mp4'
        try:
            req = requests.head(media_url)
            if req.ok:
                item['media_url'] = media_url
        except Exception as e:
            logging.error('error media url: ' + media_url + ' error msg: ' +
                          str(e))
        yield item
Example #11
0
    def parse_color_item(self, response):
        sel = Selector(response)
        baseItem = response.meta['baseItem']
        skus = response.meta['skus']
        color_data = response.meta['color_data']
        index = response.meta['index']

        images = []
        thumbnail_lis = sel.xpath('//ul[@id="thumbnail-carousel"]/li')
        for thumbnail_li in thumbnail_lis:
            imageItem = ImageItem()
            thumbnail = thumbnail_li.xpath(
                './/img[contains(@class, "productthumbnail")]/@src').extract()
            if len(thumbnail) > 0:
                imageItem['thumbnail'] = thumbnail[0]
            imageItem['image'] = thumbnail_li.xpath('./a/@href').extract()[0]
            images.append(imageItem)

        colorItem = Color()
        colorItem['show_product_id'] = baseItem['show_product_id']
        colorItem['type'] = 'color'
        colorItem['from_site'] = 'katespade'
        colorItem['name'] = color_data[index]['name']
        colorItem['cover'] = color_data[index]['cover']
        colorItem['images'] = images
        yield colorItem

        sizes_tmp = sel.xpath(
            '//ul[contains(@class, "swatches size")]/li[@class="emptyswatch"]/a/text()'
        ).re('(.+)')
        if len(sizes_tmp) == 0:
            sizes = ['one-size']
        else:
            sizes = sizes_tmp

        for size in sizes:
            skuItem = SkuItem()
            skuItem['show_product_id'] = sel.xpath(
                '//input[@id="pid"]/@value').extract()[0]
            skuItem['type'] = 'sku'
            skuItem['from_site'] = 'katespade'
            skuItem['id'] = colorItem['name'] + '-' + size
            skuItem['current_price'] = sel.xpath(
                './/span[@class="price-sales"]/text()').extract()[0]
            skuItem['list_price'] = baseItem['list_price']
            skuItem['is_outof_stock'] = False
            skuItem['color'] = colorItem['name']
            skuItem['size'] = size
            skus.append(skuItem)

        index = index + 1
        if (index) == len(color_data):
            baseItem['skus'] = skus
            yield baseItem
        else:
            color_item_url = color_data[index]['url']
            yield Request(color_item_url,
                          callback=self.parse_color_item,
                          meta={
                              'baseItem': baseItem,
                              'skus': skus,
                              'color_data': color_data,
                              'index': index
                          })
Example #12
0
    def handle_parse_item(self, response, baseItem):
        sel = Selector(response)
        #         baseItem = response.meta['baseItem']
        if len(sel.xpath('//input[@id="pid"]/@value')) > 0:
            product_id = sel.xpath('//input[@id="pid"]/@value').extract()[0]
        else:
            return
        if len(sel.xpath('//p[@class="not-available-msg out-of-stock"]')) > 0:
            return
        if len(sel.xpath('//span[contains(@class, "price-standard")]')) > 0:
            baseItem['list_price'] = sel.xpath(
                './/span[@class="price-standard"]/text()').extract()[0]
            if len(sel.xpath('.//span[@class="price-sales"]/text()')) > 0:
                baseItem['current_price'] = sel.xpath(
                    './/span[@class="price-sales"]/text()').extract()[0]
            else:
                current_price = sel.xpath(
                    './/span[@class="price-sales range-sale-price"]/text()'
                ).extract()[0]
                if '-' in current_price:
                    current_price = re.search('-\s*\$([\d\.]+)',
                                              current_price).group(1)
                baseItem['current_price'] = current_price
        else:
            if len(sel.xpath('.//span[@class="price-sales"]/text()')) > 0:
                baseItem['list_price'] = sel.xpath(
                    './/span[@class="price-sales"]/text()').extract()[0]
                baseItem['current_price'] = baseItem['list_price']
            else:
                if len(sel.xpath('.//span[@class="price-sales"]/text()')) > 0:
                    baseItem['current_price'] = sel.xpath(
                        './/span[@class="price-sales"]/text()').extract()[0]
                else:
                    current_price = sel.xpath(
                        './/div[@class="product-price sale"]/div/text()'
                    ).extract()[0].strip()
                    if '-' in current_price:
                        current_price = re.search('-\s*\$([\d\.]+)',
                                                  current_price).group(1)
                    baseItem['current_price'] = current_price
                baseItem['list_price'] = baseItem['current_price']

        baseItem['show_product_id'] = product_id
        baseItem['dimensions'] = ['size', 'color']
        baseItem['brand'] = 'katespade'
        desc_list = sel.xpath('//div[@class="description-details"]').extract()
        if len(desc_list) == 0:
            baseItem['desc'] = sel.xpath(
                '//div[@class="description-details one-column"]').extract()[0]
        else:
            baseItem['desc'] = desc_list[0]

        sizes_tmp = sel.xpath(
            '//ul[contains(@class, "swatches size")]/li[@class="emptyswatch"]/a/text()'
        ).re('(.+)')
        if len(sizes_tmp) == 0:
            sizes = ['one-size']
        else:
            sizes = sizes_tmp
        baseItem['sizes'] = sizes

        skus = []

        images = []
        thumbnail_lis = sel.xpath('//ul[@id="thumbnail-carousel"]/li')
        for thumbnail_li in thumbnail_lis:
            imageItem = ImageItem()
            thumbnail = thumbnail_li.xpath(
                './/img[contains(@class, "productthumbnail")]/@src').extract()
            if len(thumbnail) > 0:
                imageItem['thumbnail'] = thumbnail[0]
            imageItem['image'] = thumbnail_li.xpath('./a/@href').extract()[0]
            images.append(imageItem)

        color_lis = sel.xpath(
            '//ul[contains(@class, "swatches Color clearfix")]/li')
        if len(color_lis) > 1:
            color_lis = color_lis[:-1]

        if len(color_lis) == 0:
            color_lis = ['one-color-li']
            baseItem['colors'] = ['one-color']

            for color_li in color_lis:
                colorItem = Color()
                colorItem['show_product_id'] = product_id
                colorItem['type'] = 'color'
                colorItem['from_site'] = 'katespade'
                colorItem['name'] = 'one-color'
                colorItem['images'] = images
                yield colorItem

                for size in sizes:
                    skuItem = SkuItem()
                    skuItem['show_product_id'] = product_id
                    skuItem['type'] = 'sku'
                    skuItem['from_site'] = 'katespade'
                    skuItem['id'] = colorItem['name'] + '-' + size
                    skuItem['current_price'] = baseItem['current_price']
                    skuItem['list_price'] = baseItem['list_price']
                    skuItem['is_outof_stock'] = False
                    skuItem['color'] = colorItem['name']
                    skuItem['size'] = size
                    skus.append(skuItem)

            baseItem['skus'] = skus
            yield baseItem
        else:
            baseItem['colors'] = color_lis.xpath(
                './/span[@class="title"]/text()').extract()

            # for color_li in color_lis:
            colorItem = Color()
            colorItem['show_product_id'] = product_id
            colorItem['type'] = 'color'
            colorItem['from_site'] = 'katespade'
            color_selected = sel.xpath(
                '//ul[contains(@class, "swatches Color clearfix")]/li[@class="selected"]'
            )
            if len(color_selected) == 0:
                colorItem['name'] = sel.xpath(
                    '//ul[contains(@class, "swatches Color clearfix")]/li'
                )[0].xpath('./span[@class="title"]/text()').extract()[0]
                colorItem['cover'] = sel.xpath(
                    '//ul[contains(@class, "swatches Color clearfix")]/li'
                )[0].xpath('./a/img/@src').extract()[0]
            else:
                colorItem['name'] = sel.xpath(
                    '//ul[contains(@class, "swatches Color clearfix")]/li[@class="selected"]'
                )[0].xpath('./span[@class="title"]/text()').extract()[0]
                colorItem['cover'] = sel.xpath(
                    '//ul[contains(@class, "swatches Color clearfix")]/li[@class="selected"]'
                )[0].xpath('./a/img/@src').extract()[0]
            colorItem['images'] = images
            yield colorItem

            for size in sizes:
                skuItem = SkuItem()
                skuItem['show_product_id'] = product_id
                skuItem['type'] = 'sku'
                skuItem['from_site'] = 'katespade'
                skuItem['id'] = colorItem['name'] + '-' + size
                skuItem['current_price'] = baseItem['current_price']
                skuItem['list_price'] = baseItem['list_price']
                skuItem['is_outof_stock'] = False
                skuItem['color'] = colorItem['name']
                skuItem['size'] = size
                skus.append(skuItem)

            color_lis_not_selected = sel.xpath(
                '//ul[contains(@class, "swatches Color clearfix")]/li[@class="emptyswatch"]'
            )
            if len(color_lis_not_selected) == 0 or (
                    len(color_lis_not_selected) == 1
                    and len(color_selected) == 0):
                baseItem['skus'] = skus
                yield baseItem
            else:
                # for color_li_not_selected in color_lis_not_selected:
                color_item_url = color_lis_not_selected[0].xpath(
                    './a/@href').extract()[0]

                color_data = []

                for color_li in color_lis_not_selected:
                    color_data.append({
                        'name':
                        color_li.xpath(
                            './span[@class="title"]/text()').extract()[0],
                        'cover':
                        color_li.xpath('./a/img/@src').extract()[0],
                        'url':
                        color_li.xpath('./a/@href').extract()[0]
                    })

                index = 0
                yield Request(color_item_url,
                              callback=self.parse_color_item,
                              meta={
                                  'baseItem': baseItem,
                                  'skus': skus,
                                  'color_data': color_data,
                                  'index': index
                              })
Example #13
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)

        outof_stock_content = sel.xpath(
            '//div[@class="item-availability"]/span[@class="out-of-stock"]'
        ).extract()

        if len(outof_stock_content) > 0:
            return

        title = sel.xpath(
            '//div[contains(@class, "product-name")]//span/text()').extract(
            )[0]
        show_product_id = sel.xpath(
            '//div[contains(@class, "no-display")]//input[1]/@value').extract(
            )[0]
        desc_tmp = sel.xpath(
            '//div[contains(@class, "tab-content")]').extract()

        item['type'] = 'base'
        item['title'] = title
        item['show_product_id'] = show_product_id
        item['brand'] = 'Rebecca Minkoff'
        if len(desc_tmp) > 1:
            item['desc'] = '%s%s' % (desc_tmp[0], desc_tmp[1])
        else:
            item['desc'] = desc_tmp[0]

        if sel.xpath(
                '//div[contains(@class, "price-box")]//span[contains(@class, "regular-price")]'
        ):
            item['list_price'] = sel.xpath(
                '//span[contains(@class, "regular-price")]//span/text()'
            ).extract()[0]
            item['current_price'] = item['list_price']
        else:
            item['list_price'] = sel.xpath(
                '//p[contains(@class, "old-price")]//span[2]/text()').extract(
                )[0]
            item['current_price'] = sel.xpath(
                '//p[contains(@class, "special-price")]//span[2]/text()'
            ).extract()[0]

        ####
        if sel.xpath('//div[contains(@class, "product-options")]'):
            jsStr = "".join(
                re.findall(
                    r'<script type="text/javascript">[\s]*(var spConfig.*;)[\s]*</script>[\s]*<script type="text/javascript">[\s]*\/\/',
                    response.body, re.S))
            strInfo = "".join(re.findall(r'({.*})', jsStr, re.S))
            strJson = json.loads(strInfo)

            attributeID = sel.xpath(
                '//dd//div[contains(@class, "input-box")]//select/@id'
            ).extract()
            colorID = attributeID[0].replace("attribute", "")

            col_name = {}
            colors = []
            if colorID not in strJson['attributes'].keys():
                return
            for col in strJson['attributes'][colorID]['options']:
                color_id = col['id']
                name = col['label']

                color = Color()
                for productID in col['products']:
                    col_name[productID] = name

                images = []
                first_thumb = ''
                for img in strJson['swatchImages'][col['products']
                                                   [0]]['galleryImages']:
                    imageItem = ImageItem()

                    imageItem['image'] = img['url']
                    imageItem['thumbnail'] = img['thumb']
                    images.append(imageItem)

                    if len(first_thumb) == 0:
                        first_thumb = img['thumb']

                if col['swatch']['img']:
                    color['cover'] = col['swatch']['img']
                elif col['swatch']['hex']:
                    #color['cover_style'] = 'background-color: #%s;' % (col['swatch']['hex'])
                    color['cover_style'] = '#' + col['swatch']['hex']
                else:
                    color['cover'] = first_thumb

                colors.append(name)

                color['type'] = 'color'
                color['show_product_id'] = show_product_id
                color['from_site'] = 'rebeccaminkoff'
                #color['cover'] = cover
                color['images'] = images
                color['name'] = name
                yield color

            skus = []
            sizes = []
            if len(attributeID) > 1:
                sizeID = attributeID[1].replace("attribute", "")
                for skuCol in strJson['attributes'][sizeID]['options']:
                    for sku_tmp in skuCol['products']:
                        skuItem = SkuItem()
                        skuItem['type'] = 'sku'
                        skuItem['show_product_id'] = show_product_id
                        skuItem['from_site'] = "rebeccaminkoff"
                        skuItem['id'] = sku_tmp
                        skuItem['list_price'] = strJson['oldPrice']
                        skuItem['current_price'] = strJson['basePrice']
                        skuItem['size'] = skuCol['label']
                        print col_name
                        if sku_tmp not in col_name:
                            continue
                        skuItem['color'] = col_name[sku_tmp]
                        skuItem['is_outof_stock'] = False
                        #skuItem['quantity'] = ''
                        sizes.append(skuCol['label'])
                        skus.append(skuItem)
            else:
                skus = []
                sizes = ['onesize']
                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['show_product_id'] = show_product_id
                skuItem['from_site'] = 'rebeccaminkoff'
                skuItem['id'] = show_product_id
                skuItem['list_price'] = item['list_price']
                skuItem['current_price'] = item['current_price']
                skuItem['size'] = 'onesize'
                skuItem['color'] = "onecolor"
                skuItem['is_outof_stock'] = False
                skus.append(skuItem)

            item['skus'] = skus
            item['sizes'] = list(set(sizes))
            item['colors'] = list(set(colors))

            yield item
        else:
            images = []
            for img in sel.xpath(
                    '//ul[contains(@class, "product-image-thumbs")]//li'):
                imageItem = ImageItem()
                img_tmp = img.xpath('.//a//img/@src').extract()[0]
                imageItem['image'] = img_tmp
                imageItem['thumbnail'] = img_tmp.replace(
                    '/thumbnail/', '/thumbnail/60x90/')
                images.append(imageItem)

            color = Color()
            color['type'] = 'color'
            color['show_product_id'] = show_product_id
            color['from_site'] = 'rebeccaminkoff'
            color['cover'] = images[0]['thumbnail']
            color['images'] = images
            color['name'] = 'onecolor'
            yield color

            skus = []
            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = show_product_id
            skuItem['from_site'] = 'rebeccaminkoff'
            skuItem['id'] = show_product_id
            skuItem['list_price'] = item['list_price']
            skuItem['current_price'] = item['current_price']
            skuItem['size'] = 'onesize'
            skuItem['color'] = "onecolor"
            skuItem['is_outof_stock'] = False
            skus.append(skuItem)

            item['skus'] = skus
            item['sizes'] = ['onesize']
            item['colors'] = ['onecolor']

            yield item
Example #14
0
 def handle_parse_item(self, response, baseItem):
     sel = Selector(response)
     
     if len(sel.xpath('//table[@id="TblProdForkPromo"]/tr').extract()) > 0:
         baseItem['desc'] = '<table>' + sel.xpath('//table[@id="TblProdForkPromo"]/tr').extract()[0] + '</table>'
     else:
         baseItem['desc'] = ''
     
     baseItem['dimensions'] = ['size', 'color']
     baseItem['sizes'] = ['one-size']
      
     color_lis = sel.xpath('//dl[@id="color"]//li')
     
     if len(color_lis) > 0:
         
         color_urls = []
         colors = []
         for color_li in color_lis:
             
             color_item_uri = color_li.xpath('./a/@href').extract()[0]
             color_url = self.base_url + color_item_uri
             
             
             color_name = color_li.xpath('./a/div[@class="distinctionName"]/text()').extract()[0]
             
             colors.append(color_name)
             
             color_cover = color_li.xpath('./a/div/img/@src').extract()[0]
             
             color_urls.append({'url': color_url, 'color_name': color_name, 'color_cover': color_cover})
             
         baseItem['colors'] = colors
         yield Request(color_urls[0]['url'], callback=self.parse_color_item
                       , meta={'baseItem': baseItem, 'color_urls': color_urls, 'index': 0, 'skus': []})
         
     else:  
         baseItem['colors'] = ['one-color']
         
         skus = []
         skuItem = SkuItem()
         skuItem['type'] = 'sku'
         skuItem['show_product_id'] = baseItem['show_product_id']
         skuItem['from_site'] = self.name
         skuItem['current_price'] = sel.xpath('//div[@class="prodprice saleprice"]/p/span[@itemprop="price"]/text()').extract()[0]
         if len(sel.xpath('//div[@class="prodprice saleprice"]/p/span[@class="basePrice"]/text()').extract()) > 0:
             skuItem['list_price'] = sel.xpath('//div[@class="prodprice saleprice"]/p/span[@class="basePrice"]/text()').extract()[0]
         else:
             skuItem['list_price'] = skuItem['current_price']
         skuItem['is_outof_stock'] = False
         skuItem['color'] = 'one-color'
         skuItem['size'] = 'one-size'
         skuItem['id'] = baseItem['show_product_id']
         skus.append(skuItem)
         imageItem = ImageItem()
         
         image_url = sel.xpath('//meta[@property="og:image"]/@content').extract()[0]
         
         imageItem['image'] = re.sub(r'wid=\d+&hei=\d+', 'wid=1000&hei=1000', image_url)
         imageItem['thumbnail'] = re.sub(r'wid=\d+&hei=\d+', 'wid=50&hei=50', image_url)
 
         images = []
         images.append(imageItem)
 
         colorItem = Color()
         colorItem['type'] = 'color'
         colorItem['show_product_id'] = baseItem['show_product_id']
         colorItem['from_site'] = self.name
         colorItem['images'] = images
         colorItem['name'] = 'one-color'
         colorItem['cover'] = imageItem['thumbnail']
         
         yield colorItem
         baseItem['skus'] = skus
         yield baseItem
Example #15
0
    def handle_parse_item(self, response, item):

        sel = Selector(response)

        if len(sel.xpath('//input[@id="waitlistSubmit"]').extract()) > 0:
            return
        if len(sel.xpath("//button[@id='add-to-cart']")) > 1:
            return
        if 'preowned' in response.url:
            return
        size_chart_url = sel.xpath(
            '//div[@class="popover-content"]/img/@src').extract()
        if len(size_chart_url) > 0:
            size_chart_url = size_chart_url[0]
            item['size_info'] = {'size_chart_url': size_chart_url}

        color_li = sel.xpath('//ul[@class="product-color-list"]/li[1]')

        colorItem = Color()
        colorItem['from_site'] = self.name
        colorItem['show_product_id'] = item['show_product_id']
        colorItem['type'] = 'color'

        if len(color_li) > 0:
            color_name_text = color_li.xpath('./a/@data-color').extract()
            cover_text = color_li.xpath('./a/img/@src').extract()
            if len(cover_text) > 0 and len(color_name_text) > 0:
                colorItem['cover'] = 'http:' + cover_text[0]
                colorItem['name'] = color_name_text[0]
                color_name = color_name_text[0]
            else:
                cover_text = color_li.xpath(
                    './a/div[@class="center-cropped"]/@style').re(
                        'url\(\'(.+)\'\)')
                if len(cover_text) > 0 and len(color_name_text) > 0:
                    colorItem['cover'] = 'http:' + cover_text[0]
                    colorItem['name'] = color_name_text[0]
                    color_name = color_name_text[0]
                else:
                    return

        elif len(
                sel.xpath(
                    '//span[@class="mz-productoptions-optionvalue"]/text()').
                extract()) > 0:
            color_name_text = sel.xpath(
                '//span[@class="mz-productoptions-optionvalue"]/text()'
            ).extract()
            if len(color_name_text) == 0:
                return
            color_name = color_name_text[0]
            colorItem['name'] = color_name
        else:
            color_name = 'one_color'
            colorItem['name'] = color_name

        colorImages = []
        color_image_array = sel.xpath('//div[@id="productimages"]/img')
        if len(color_image_array) > 0:
            for color_image in color_image_array:
                if len(color_image.xpath('./@src')) > 0:
                    color_image_thumb = 'http:' + color_image.xpath(
                        './@src').extract()[0]
                else:
                    color_image_thumb = 'http:' + color_image.xpath(
                        './@data-src').extract()[0]
                if len(color_image.xpath('./@data-zoom')) > 0:
                    color_image_url = 'http:' + color_image.xpath(
                        './@data-zoom').extract()[0]
                else:
                    color_image_url = color_image_thumb.replace('537', '2160')

                if 'cover' not in colorItem.keys():
                    colorItem['cover'] = color_image_thumb.replace('537', '40')
                colorImages.append({
                    'thumbnail': color_image_thumb,
                    'image': color_image_url
                })

            colorItem['images'] = colorImages

            yield colorItem

        item['colors'] = [color_name]
        item['dimensions'] = ['size']

        #             if sel.xpath('//div[@class="mz-productoptions-valuecontainer"]').extract() <= 0:
        #                 item['size'] = 'One Size'
        skus = []
        sizes = []
        sku_spans = sel.xpath(
            '//span[@class="mz-productoptions-sizebox   "] | //span[@class="mz-productoptions-sizebox   selected-box"]'
        )
        sku_color = sel.xpath(
            '//div[@class="mz-productoptions-optioncontainer colorList"]/div/span[@class="mz-productoptions-optionvalue"]/text()'
        )
        if len(sku_color) > 0:
            sku_color = sku_color.extract()[0]
        else:
            sku_color = 'one_color'

        if len(sku_spans) > 0:
            for sku_span in sku_spans:
                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['from_site'] = self.name
                skuItem['show_product_id'] = item['show_product_id']
                skuItem['id'] = item['show_product_id'] + '-' + sku_span.xpath(
                    './@data-value').extract()[0]

                list_price = sel.xpath(
                    '//div[@class="mz-price is-crossedout"]/text()')
                current_price = sel.xpath('//div[@class="mz-price"]/text()')

                if len(current_price) > 0:
                    if '-' in current_price.extract()[0]:
                        current_price = current_price.re('-\s*\$(\S+)')[0]
                    else:
                        current_price = current_price.re(r'(\S+)')[0]
                elif len(
                        sel.xpath(
                            '//div[@class="mz-price is-saleprice"]/text()')
                ) > 0:
                    current_price = sel.xpath(
                        '//div[@class="mz-price is-saleprice"]/text()')
                    if '-' in current_price.extract()[0]:
                        current_price = current_price.re('-\s*\$(\S+)')[0]
                    else:
                        current_price = current_price.re(r'(\S+)')[0]

                if len(list_price) > 0:
                    if re.findall('Retail', list_price.extract()[0]):
                        list_price = list_price.re(r'[\d\.]+')[0]
                    else:
                        list_price = list_price.re(r'(\S+)')[0]
                else:
                    list_price = current_price
                skuItem["list_price"] = list_price
                skuItem['current_price'] = current_price
                skuItem['color'] = color_name
                skuItem['size'] = sku_span.xpath('text()').extract()[0].strip()
                skuItem['is_outof_stock'] = False

                sizes.append(skuItem['size'])
                skus.append(skuItem)
        else:
            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['from_site'] = self.name
            skuItem['show_product_id'] = item['show_product_id']
            skuItem['id'] = item['show_product_id']

            list_price = sel.xpath(
                '//div[@class="mz-price is-crossedout"]/text()')
            current_price = sel.xpath('//div[@class="mz-price"]/text()')

            if len(current_price) > 0:
                current_price = current_price.re(r'(\S+)')[0]
            elif len(sel.xpath(
                    '//div[@class="mz-price is-saleprice"]/text()')) > 0:
                current_price = sel.xpath(
                    '//div[@class="mz-price is-saleprice"]/text()').re(
                        r'(\S+)')[0]

            if len(list_price) > 0:
                if re.findall('Retail', list_price.extract()[0]):
                    list_price = list_price.re(r'[\d\.]+')[0]
                else:
                    list_price = list_price.re(r'(\S+)')[0]
            else:
                list_price = current_price

            skuItem["list_price"] = list_price
            skuItem['current_price'] = current_price

            skuItem['color'] = color_name
            skuItem['size'] = 'One Size'
            skuItem['is_outof_stock'] = False
            sizes.append(skuItem['size'])
            skus.append(skuItem)
        item['sizes'] = sizes
        item['skus'] = skus

        desc_div = sel.xpath(
            '//div[@class="mz-productdetail-description"]/text()').extract()
        desc_lis = sel.xpath(
            '//ul[@class="mz-productdetail-props"]/li').extract()
        if len(desc_div) > 0:
            item['desc'] = desc_div[0]
        else:
            item['desc'] = ''

        if len(desc_lis) > 0:
            item['desc'] += ''.join(desc_lis)

        yield item
Example #16
0
    def handle_parse_item(self, response, item):
        skus = []
        sel = Selector(response)
        item['from_site'] = self.name

        if 'whoops' in response.url:
            logging.warning('anti scraping: ' + response.url)

        match = re.search(r'"product_id":\s*\[\s*"([^"]+)"\s*\]',
                          response.body)
        if match is None:
            return

        temp_show_product_id = match.group(1)

        current_price = sel.xpath(
            '//span[contains(@class, "price-sales")]/text()').extract()

        if len(current_price) > 0:

            current_price = current_price[0]

            list_price = sel.xpath(
                '//span[contains(@class, "price-standard")]/text()').re(
                    r'(\S+)')

            if len(list_price) > 0:
                list_price = list_price[0]
            else:
                list_price = current_price

            item['brand'] = self.name
            #             item['desc']=".".join(sel.xpath('//div[contains(@class, "additional")]/ul/li/text()').extract())

            desc1 = sel.xpath(
                '//div[@class="categorylisting detail"]/div/div').extract()[0]
            desc2 = sel.xpath(
                '//div[@class="categorylisting fabric"]/div/div').extract()[0]

            item['desc'] = re.sub(r'[\t\n]', '', desc1 + desc2)
            item['desc'] = re.sub('<img.+?>', '', item['desc'])
            if sel.xpath(
                    '//div[contains(@class, "quantity clearfix")]//p[contains(@class, "in-stock-msg")]/text()'
            ):

                colors = []

                item_colors_links = sel.xpath(
                    '//div[@id="product-content"]//ul[contains(@class, "swatches color")]//li[contains(@class,"selected")]/a'
                )
                item_sizes = sel.xpath(
                    '//div[@id="product-content"]//div[contains(@class, "value")]//ul[contains(@class, "swatches size")]/li[@class!="emptyswatch unselectable"]//@title'
                ).extract()

                item['sizes'] = item_sizes
                item['dimensions'] = ['size']
                item['product_type'] = 'mother-baby'
                if len(item_colors_links) == 0:
                    item_colors_links = ['one_color']
                for item_color_link in item_colors_links:

                    images = []
                    thumbnails = sel.xpath(
                        '//div[@id="thumbnails"]//li[@class!="thumb pdpvideo"]'
                    )
                    if thumbnails:
                        for li in thumbnails:
                            #thumbnails_evl=li.xpath('./a/img/@src').extract()[0]
                            imageItem = ImageItem()
                            image_url = li.xpath('./a/img/@src').extract()[0]
                            imageItem['image'] = self.handle_image_url(
                                image_url.encode('utf-8'), 1000, 1000)
                            imageItem['thumbnail'] = self.handle_image_url(
                                image_url.encode('utf-8'), 350, 350)
                            images.append(imageItem)
                    elif sel.xpath(
                            '//div[@id="thumbnails"]/li[@class="thumb pdpvideo"]/a/img/@src'
                    ):
                        imageItem = ImageItem()
                        image_url = sel.xpath(
                            '//img[@class="primary-image"]/@src').extract()[0]
                        imageItem['image'] = self.handle_image_url(
                            image_url.encode('utf-8'), 1000, 1000)
                        imageItem['thumbnail'] = self.handle_image_url(
                            image_url.encode('utf-8'), 350, 350)
                        images.append(imageItem)
                    else:
                        imageItem = ImageItem()
                        image_url = sel.xpath(
                            '//img[@class="primary-image"]/@src').extract()[0]
                        imageItem['image'] = self.handle_image_url(
                            image_url.encode('utf-8'), 1000, 1000)
                        imageItem['thumbnail'] = self.handle_image_url(
                            image_url.encode('utf-8'), 350, 350)
                        images.append(imageItem)

                    if len(item_colors_links) > 0:
                        color_name = item_color_link.xpath(
                            './@title').extract()[0]
                        color_cover = item_color_link.xpath('./@style').re(
                            'http://[^\)]+')[0]
                    else:
                        color_name = 'one_color'
                        color_cover = images[0]['thumbnail']

                    colors.append(color_name)

                    show_product_id = temp_show_product_id + "*" + color_name
                    item['show_product_id'] = show_product_id

                    color = Color()
                    color['type'] = 'color'
                    color['from_site'] = self.name
                    color['show_product_id'] = show_product_id
                    color['images'] = images
                    color['name'] = color_name
                    color['cover'] = color_cover

                    yield color

                    for item_size in item_sizes:
                        skuItem = SkuItem()
                        skuItem['type'] = 'sku'
                        skuItem['show_product_id'] = show_product_id
                        skuItem[
                            'id'] = item['show_product_id'] + "*" + item_size
                        skuItem['current_price'] = current_price
                        skuItem['list_price'] = list_price
                        if len(item_colors_links) > 0:
                            skuItem['color'] = item_color_link.xpath(
                                './@title').extract()[0]
                        else:
                            skuItem['color'] = 'one_color'
                        skuItem['size'] = item_size
                        skuItem['from_site'] = self.name
                        skuItem['is_outof_stock'] = False
                        skuItem['quantity'] = sel.xpath(
                            '//select[contains(@id, "Quantity")]//@value'
                        ).extract()[0]
                        #yield skuItem
                        skus.append(skuItem)

                item['colors'] = colors
                item['skus'] = skus
                yield item
Example #17
0
    def handle_parse_item(self, response, item):
        body_json = json.loads(response.body)
        goods_detail = body_json['data']
        if goods_detail['inStock'] == 0:
            return
        item['linkhaitao_url'] = response.url
        item['cover'] = goods_detail['coverImgUrl']
        item['desc'] = goods_detail['content']['description']

        if 'product_type_id' in os.environ.keys():
            self.product_type_id = os.environ['product_type_id']
        if 'category_id' in os.environ.keys():
            self.category_id = os.environ['category_id']
        if self.product_type_id:
            item['product_type_id'] = int(self.product_type_id)
            item['product_type'] = 'linkhaitao_' + str(self.product_type_id)
        if self.category_id:
            item['category_id'] = int(self.category_id)
            item['category'] = 'linkhaitao_' + str(self.category_id)

        if 'editor_flag' in os.environ.keys():
            self.editor_flag = os.environ['editor_flag']
        if self.editor_flag:
            item['editor_flag'] = self.editor_flag
        if 'gender' in os.environ.keys():
            self.gender = os.environ['gender']
        if self.gender:
            item['gender'] = self.gender

        item['dimensions'] = ['size', 'color']
        item['brand'] = goods_detail['brand']['name_en']
        item['title'] = goods_detail['name']
        item['current_price'] = goods_detail['realPriceOrg']
        item['list_price'] = goods_detail['mallPriceOrg']
        from_site = ''.join(goods_detail['sellerName']['namecn'].split()).lower()
        if self.is_chinese_word(from_site):
            from_site = ''.join(goods_detail['sellerName']['namecn'].split()).lower()
        if "'" in from_site:
            from_site = from_site.replace("'", "")
        if '/' in from_site:
            from_site = from_site.split('/')[0]
        item['from_site'] = from_site
        if item['from_site'] == '6pm' or item['from_site'] == '6pm/6pm':
            item['from_site'] = 'sixpm'
        spu_id = re.search('spuid=(.+)&?',response.url)
        if spu_id:
            spu_id = spu_id.group(1)
        else:
            spu_id = re.search('&spu=(.+)&?',response.url).group(1)
        item['show_product_id'] = spu_id
        item['url'] = goods_detail['pageUrl']
        if self.editor_flag:
            item['editor_flag'] = self.editor_flag

        if not goods_detail['skuInfo']:
            colorItem = Color()
            images = []
            color_names = []
            skus=[]
            for image in goods_detail['coverImgList']:
                imageItem = ImageItem()
                imageItem['image'] = image
                imageItem['thumbnail'] = image
                images.append(imageItem.copy())
            colorItem['images'] = images
            colorItem['type'] = 'color'
            colorItem['from_site'] = item['from_site']
            colorItem['show_product_id'] = item['show_product_id']
            color_name = 'One Color'
            if color_name not in color_names:
                color_names.append(color_name)
            colorItem['name'] = color_name
            colorItem['cover'] = goods_detail['coverImgUrl']
            yield colorItem

            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = item['show_product_id']
            skuItem['list_price'] = item['list_price']
            skuItem['current_price'] = item['current_price']
            skuItem['color'] = color_name
            skuItem['id'] = item['show_product_id'] + 'onecolor'
            skuItem['from_site'] = item['from_site']
            if goods_detail['inStock'] == 0:
                skuItem['is_outof_stock'] = True
            skuItem['size'] = 'One Size'
            skus.append(skuItem)
            item['sizes'] = ['One Size']
        else:
            skus_info = goods_detail['skuInfo']['style']['skustylelist']
            color_names = []
            skus = []
            sizes = []
            dimensions_values = {}
            for sku_info in skus_info:
                colorItem = Color()
                images = []
                for image in sku_info['coverImgList']:
                    imageItem = ImageItem()
                    imageItem['image'] = image
                    imageItem['thumbnail'] = image
                    images.append(imageItem.copy())
                colorItem['images'] = images
                colorItem['type'] = 'color'
                colorItem['from_site'] = item['from_site']
                colorItem['show_product_id'] = item['show_product_id']
                if sku_info['style']:
                    color_name = sku_info['style']
                else:
                    color_name = 'One Color'
                if color_name not in color_names:
                    color_names.append(color_name)
                colorItem['name'] = color_name
                colorItem['cover'] = images[0]['image']
                yield colorItem

                for sku in sku_info['data']:
                    skuItem = SkuItem()
                    skuItem['type'] = 'sku'
                    skuItem['show_product_id'] = item['show_product_id']
                    skuItem['list_price'] = sku['mallPriceOrg']
                    skuItem['current_price'] =sku['realPriceOrg']
                    skuItem['color'] = color_name
                    skuItem['id'] = sku['skuid']
                    skuItem['from_site'] = item['from_site']
                    if sku['inStock'] == 0:
                        skuItem['is_outof_stock'] = True
                    if len(sku['attr']) == 0:
                        skuItem['size'] = 'One Size'
                        if skuItem['size'] not in sizes:
                            sizes.append(skuItem['size'])
                    else:
                        skuItem['size'] = {}
                        for attr in sku['attr']:
                            skuItem['size'][attr['attrName'].lower()] = attr['attrValue']
                            if attr['attrName'].lower() not in item['dimensions']:
                                item['dimensions'].append(attr['attrName'].lower())
                            if attr['attrName'].lower() not in dimensions_values.keys():
                                dimensions_values[attr['attrName'].lower()] = [attr['attrValue']]
                            else:
                                dimensions_values[attr['attrName'].lower()].append(attr['attrValue'])
                        if 'size' not in skuItem['size'].keys():
                            skuItem['size']['size'] = 'One Size'
                            dimensions_values['size'] = 'One Size'
                    skus.append(skuItem)
            if sizes:
                item['sizes'] = sizes
            elif dimensions_values:
                item['sizes'] = dimensions_values
            else:
                return
        item['skus'] = skus
        item['colors'] = color_names
        yield item
Example #18
0
    def handle_parse_item(self, response, item):
        '''第一种情况'''
        sel = Selector(response)
        product_json_str_dom = sel.xpath(
            '//script[@id="productMainData"]/text()').extract()
        if len(product_json_str_dom) > 0:
            product_json_str = product_json_str_dom[0]

            product_json = json.loads(product_json_str)

            show_product_id = product_json['id']
            h = HTMLParser()
            title = h.unescape(h.unescape(product_json['title']))
            cover = product_json['imageUrl']
            desc = sel.xpath(
                '//section[contains(@class, "product-details-content")]'
            ).extract()[0]
            brand = product_json['brandName']
            '''color handle'''
            color_covers = product_json['colorSwatchMap']
            color_primary_iamges = product_json['images'][
                'colorwayPrimaryImages']

            colors = []
            color_items = {}
            color_handle_map = {}

            for color_name in color_primary_iamges:

                if color_name in color_covers.keys():
                    color_cover = self.image_prefix + color_covers[color_name]
                else:
                    color_cover = self.image_prefix + color_primary_iamges[
                        color_name]

                colorItem = Color()

                colorItem['type'] = 'color'
                colorItem['from_site'] = 'macys'
                colorItem['show_product_id'] = show_product_id
                colorItem['name'] = color_name
                colorItem['cover'] = color_cover

                images = []
                '''颜色主图片'''

                if color_name in product_json['images'][
                        'colorwayPrimaryImages'].keys():
                    image = self.image_prefix + product_json['images'][
                        'colorwayPrimaryImages'][color_name]
                    images.append({
                        'thumbnail': image,
                        'image': image + '?wid=1000'
                    })
                '''颜色附加图片'''
                color_additional_handled = False
                if color_name in product_json['images'][
                        'colorwayAdditionalImages'].keys():
                    color_additional_images_str = product_json['images'][
                        'colorwayAdditionalImages'][color_name]
                    color_additional_images = color_additional_images_str.split(
                        ',')

                    for color_additional_image in color_additional_images:

                        image = self.image_prefix + color_additional_image
                        images.append({
                            'thumbnail': image,
                            'image': image + '?wid=1000'
                        })

                    color_additional_handled = True
                '''通用附加图片'''
                additional_handled = False
                if len(product_json['images']['additionalImages']) > 0:
                    additional_handled = True
                    additional_images = product_json['images'][
                        'additionalImages']
                    for additional_image in additional_images:

                        image = self.image_prefix + additional_image
                        images.append({
                            'thumbnail': image,
                            'image': image + '?wid=1000'
                        })
                colorItem['images'] = images

                color_items[color_name] = colorItem

                if color_additional_handled == True or additional_handled == True:
                    color_handle_map[color_name] = True
                else:
                    color_handle_map[color_name] = False

                colors.append(color_name)

            for color_name in color_handle_map:
                if color_handle_map[color_name] == False:
                    selected_color_name = product_json['selectedColor']

                    if selected_color_name in product_json['images'][
                            'colorwayAdditionalImages'].keys():
                        color_additional_images_str = product_json['images'][
                            'colorwayAdditionalImages'][selected_color_name]
                        color_additional_images = color_additional_images_str.split(
                            ',')

                        for color_additional_image in color_additional_images:

                            image = self.image_prefix + color_additional_image
                            color_items[color_name]['images'].append({
                                'thumbnail':
                                image,
                                'image':
                                image + '?wid=1000'
                            })

            for color_item_name in color_items:
                yield color_items[color_item_name]
            '''color handle end'''
            '''handle item info begin'''
            item['title'] = title
            item['brand'] = brand
            item['cover'] = cover
            item['desc'] = desc
            item['colors'] = colors
            item['show_product_id'] = show_product_id

            sizes = {'size': product_json['sizesList']}
            item['sizes'] = sizes
            if item['sizes']['size'] == []:
                item['sizes'] = ['One Size']
            item['dimensions'] = ['size']

            upc_list = product_json['upcMap'][show_product_id]

            color_price_map = {}
            for price in product_json['colorwayPricingSwatches']:
                price_map = product_json['colorwayPricingSwatches'][price]

                for color_name in price_map:

                    color_price = price_map[color_name]
                    if color_price['onSale'] == False:
                        current_price = color_price['tieredPrice'][0]['value'][
                            0]
                        list_price = current_price
                    else:
                        color_price_len = len(color_price['tieredPrice'])
                        current_price = color_price['tieredPrice'][
                            color_price_len - 1]['value'][0]
                        list_price = color_price['tieredPrice'][0]['value'][0]

                    color_price_map[color_name] = {
                        'current_price': current_price,
                        'list_price': list_price
                    }

            skuCollectionsList = []
            for sku_stock in upc_list:
                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['from_site'] = 'macys'
                skuItem['show_product_id'] = item['show_product_id']
                skuItem['id'] = sku_stock['upcID']

                if len(color_price_map) == 0 or sku_stock[
                        'color'] not in color_price_map.keys():
                    skuItem["list_price"] = product_json['regularPrice']
                    skuItem['current_price'] = product_json['salePrice']

                    if len(skuItem['current_price']) == 0:
                        skuItem['current_price'] = skuItem["list_price"]
                else:
                    skuItem["list_price"] = color_price_map[
                        sku_stock['color']]['list_price']
                    skuItem['current_price'] = color_price_map[
                        sku_stock['color']]['current_price']

                skuItem['color'] = sku_stock['color']
                if not item['colors']:
                    item['colors'] = [skuItem['color']]
                skuItem['size'] = sku_stock['size']
                if not skuItem['size']:
                    skuItem['size'] = 'One Size'

                if sku_stock['isAvailable'] == "true":
                    skuItem['is_outof_stock'] = False
                else:
                    skuItem['is_outof_stock'] = True

                skuCollectionsList.append(skuItem)

            item['skus'] = skuCollectionsList
            if product_json['sizeChartMap'][show_product_id][
                    'sizeChartCanvasId']:
                size_chart_url = self.base_url + '/shop/catalog/product/canvassizechart/json?canvasId=' + product_json[
                    'sizeChartMap'][show_product_id]['sizeChartCanvasId']
                yield Request(url=size_chart_url,
                              meta={'item': item},
                              callback=self.parse_size_chart)
            elif product_json['sizeChartMap'][show_product_id]['sizeChart']:
                item['size_info'] = self.size_chart_pic_url + product_json[
                    'sizeChartMap'][show_product_id]['sizeChart']
                yield item
            elif product_json['sizeChartMap'][show_product_id][
                    'intlSizeChart']:
                item['size_info'] = self.size_chart_pic_url + product_json[
                    'sizeChartMap'][show_product_id]['intlSizeChart']
                yield item
            else:
                item['size_info'] = ''
                yield item
        else:
            member_url_doms = sel.xpath(
                '//div[contains(@class, "memberProducts")]')

            if len(member_url_doms) > 0:
                for member_url_dom in member_url_doms:

                    url = member_url_dom.xpath('./@data-pdp-url').extract()[0]
                    url = self.base_url + url

                    yield Request(url=url,
                                  meta={'item': item},
                                  callback=self.parse_item)
Example #19
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)

        if not sel.xpath('//h1[contains(@class, "prod-title")]'):
            print get_base_url(response)
            return

        if sel.xpath('//font[@class="prodError"]'):
            return

        show_product_id = sel.xpath('//span[contains(@class, "style-num")]/text()').extract()[0]
        baseItem = item
        baseItem['type'] = 'base'
        baseItem['url'] = get_base_url(response)
        baseItem['title'] = sel.xpath('//h1[contains(@class, "prod-title")]/text()').extract()[0]
        baseItem['cover'] = item['cover']

        if sel.xpath('//span[contains(@itemprop, "offers")]//span[contains(@class, "sale-price")]'):
            if sel.xpath('//span[contains(@itemprop, "offers")]//span[contains(@class, "sale-price")]//span[2]'):
                baseItem['current_price'] = sel.xpath('//span[contains(@itemprop, "offers")]//span[contains(@class, "sale-price")]//span[2]/text()').extract()[0]
            else:
                baseItem['current_price'] = sel.xpath('//span[contains(@itemprop, "offers")]//span[contains(@class, "sale-price")]//span/text()').extract()[0]
        elif sel.xpath('//span[contains(@class, "reg-price")]//span[contains(@itemprop, "price")]'):
            #baseItem['current_price'] = sel.xpath('//span[contains(@itemprop, "price")]/text()').extract()[0]
            baseItem['current_price'] = sel.xpath('//span[contains(@class, "reg-price")]//span[contains(@itemprop, "price")]/text()').extract()[0]
        elif sel.xpath('//span[contains(@class, "reg-price")]//span[contains(@itemprop, "highPrice")]'):
            baseItem['current_price'] = sel.xpath('//span[contains(@class, "reg-price")]//span[contains(@itemprop, "highPrice")]/text()').extract()[0]
        else :
            return

        if sel.xpath('//span[contains(@class, "reg-price is-sale")]'):
            list_pri = sel.xpath('//span[contains(@class, "reg-price is-sale")]/text()').extract()[0]
            if list_pri.find('-') == -1:
                baseItem['list_price'] = list_pri
            else:
                baseItem['list_price'] = list_pri.split("-")[1]

        else:
            baseItem['list_price'] = baseItem['current_price']

        baseItem['show_product_id'] = show_product_id
        baseItem['desc'] = sel.xpath('//div[contains(@class, "detail")]//ul').extract()[0]
        baseItem['brand'] = 'Ralph Lauren'
        baseItem['from_site'] = 'ralphlauren'
        baseItem['product_type'] = item['product_type']
        baseItem['category'] = item['category']
        #baseItem['sub_category'] =
        baseItem['gender'] = item['gender']

        if sel.xpath('//a[contains(@id, "sizechart")]'):
            baseItem['size_info'] = '%s%s' % (self.base_url, sel.xpath('//a[contains(@id, "sizechart")]/@href').extract()[0] )
        else:
            baseItem['size_info'] = ''

        #jsStr = ",".join(re.findall(r'itemMap.*[\s]=[\s]*({[^}]+}[\s]*);', response.body))

        ####
        jsStr2 = "".join(re.findall(r'<script>[\s]*(var isTablet.*;)[\s]*</script>[\s]*<div class="prod-utility">', response.body, re.S))
        strinfo = re.compile('var isTablet.*;')
        imgStr = strinfo.sub('var altImages = new Array();var Scene7Map = new Array();', jsStr2)
        #print imgStr
        context2 = execjs.compile('''
            %s
            function getImages(){
                return orderedImageMap_0;
            }
            function getImages2(){
                var imageArr = new Array()
                for (i in altImages){
                    for (j in altImages[i]) {
                        altImages[i][j]["cId"] = i
                    }
                    imageArr.push(altImages[i])
                }
                return imageArr;
            }
            function getScene7Map(){
                var Scene7Maps = new Array();
                var cIds = new Array();
                for (i in Scene7Map){
                    if (i.toString().indexOf("c") != -1 ){
                        cId = i.toString().substr(1, i.length-1)
                        cIds.push(cId)
                    }
                }

                for (ii in Scene7Map){
                    for (jj in cIds) {
                        s7Index = "s7" + cIds[jj]
                        if (ii == s7Index) {
                            Scene7Maps.push({ "cId":cIds[jj], "cValue":Scene7Map[ii]})
                        }

                    }
                }
                return Scene7Maps
            }
        ''' % imgStr.decode('cp1252').encode('utf-8') )

        getImages = context2.call('getImages')
        getImages2 = context2.call('getImages2')
        Scene7Map = context2.call('getScene7Map')

        imgsArr = []
        for imgTmp in getImages:
            imgsArr_tmp = []

            #replace pic
            for STmp in Scene7Map:
                if STmp['cId'] == imgTmp['cId']:
                    imgTmp['v400'] = 'http://s7d2.scene7.com/is/image/PoloGSI/%s?$flyout_main$&cropN=0.12,0,0.7993,1&iv=fLNd_3&wid=1410&hei=1770&fit=fit,1' % (STmp['cValue'])
            imgsArr_tmp.append({"image":imgTmp['v400'],  "thumbnail": imgTmp['x50']})

            #video
            if 'vid' in imgTmp.keys():
                item['media_url'] = 'http://s7d2.scene7.com/is/content/PoloGSI/' + imgTmp['vid']

            for imgTmp2 in getImages2:
                for imgTmp22 in imgTmp2:
                    if imgTmp['cId'] == imgTmp22['cId']:
                        if imgTmp22['t940'] == '' and imgTmp22['x50'] != '':
                            imgTmp22['t940'] = imgTmp22['x50'].replace('_t50','_t940')
                        elif imgTmp22['t940'] != '' and imgTmp22['x50'] == '':
                            imgTmp22['x50'] = imgTmp22['t940'].replace('_t940','_t50')
                        imgsArr_tmp.append({"image":imgTmp22['t940'],  "thumbnail": imgTmp22['x50']})

            imgsArr.append( {"cId": imgTmp['cId'], "pics": imgsArr_tmp} )

        color_col = sel.xpath('//ul[contains(@id, "color-swatches")]//li')
        for colors in color_col:
            color_id = colors.xpath('./@data-value').extract()[0]
            cover = colors.xpath('.//img/@src').extract()[0]
            name = colors.xpath('.//img/@title').extract()[0]

            images = []
            for img in imgsArr:
                if img['cId'] == color_id:
                    images = img['pics']

            color = Color()
            color['type'] = 'color'
            color['show_product_id'] = show_product_id
            color['from_site'] = 'ralphlauren'
            color['cover'] = cover
            color['images'] = images
            color['name'] = name
            yield color


        ####
        jsStr1 = "".join(re.findall(r'<script>[\s]*(var itemMap.*;)[\s]*</script>[\s]*<!--previousURL', response.body, re.S))
        context1 = execjs.compile('''
            %s
            function getItemMaps(){
                return itemMap;
            }
        ''' % jsStr1.decode('cp1252').encode('utf-8'))


        skus = []
        sizes = []
        colors = []
        getItemMaps = context1.call('getItemMaps')
        for ItemMaps in getItemMaps:
            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = show_product_id
            skuItem['from_site'] = 'ralphlauren'
            #skuItem['id'] = show_product_id + '-' +ItemMaps['sDesc']
            skuItem['id'] = ItemMaps['sku']
            skuItem['list_price'] = baseItem['list_price']
            skuItem['current_price'] = ItemMaps['price']
            skuItem['size'] = ItemMaps['sDesc']
            skuItem['color'] = ItemMaps['cDesc']
            if ItemMaps['avail'] == "OUT_OF_STOCK":
                skuItem['is_outof_stock'] = True
            else:
                skuItem['is_outof_stock'] = False
            skuItem['quantity'] = ItemMaps['quantityOnHand']
            sizes.append(ItemMaps['sDesc'])
            skus.append(skuItem)
            colors.append(ItemMaps['cDesc'])

        baseItem['skus'] = skus
        baseItem['sizes'] = list(set(sizes))
        baseItem['colors'] = list(set(colors))

        yield baseItem
Example #20
0
    def handle_parse_item(self, response, baseItem):
        sel = Selector(response)
        #         bread_crumbs=sel.xpath('//div[@id="divBreadCrumb"]/span')
        #         baseItem['product_type']=
        #         baseItem['category']=bread_crumbs[2].xpath('./a/text()').extract()[0]
        #         import pdb;pdb.set_trace()
        product_id_re_result = re.findall(r'dtmProductId = [^;]+',
                                          response.body)

        if product_id_re_result and len(product_id_re_result) > 0:
            product_id_str = product_id_re_result[0]
            product_id = re.findall(r'\d+', product_id_str)[0]
            baseItem['show_product_id'] = int(product_id)
            #baseItem['sub_category']=bread_crumbs[3].xpath('./a/text()').extract()[0]
            baseItem['type'] = 'base'
            item_in_stock = sel.xpath('//div[@id="divAvailablity"]')
            if len(item_in_stock) > 0:
                baseItem['title'] = sel.xpath(
                    '//div[@id="divCaption"]/h1/text()').extract()[0]

                desc_a = sel.xpath(
                    '//table[@id="TblProdForkPromo"]//td[@class="contenttd"]'
                ).extract()
                desc_b = sel.xpath(
                    '//table[@id="TblProdForkWarnings"]//td[@class="contenttd"]'
                ).extract()
                if len(desc_a) > 0:
                    baseItem['desc'] = desc_a[0]
                if len(desc_b) > 0:
                    baseItem['desc'] = desc_b[0]
                baseItem['colors'] = ['onecolor']
                baseItem['sizes'] = ['onesize']
                baseItem['dimensions'] = ['size', 'color']
                baseItem['from_site'] = self.name

                imageItem = ImageItem()
                images = []
                imageItem['thumbnail'] = sel.xpath(
                    '//div[@id="divPImage"]//img/@src').extract()[0]
                imageItem['image'] = re.sub(r'300(\.\w+)', '500\\1',
                                            imageItem['thumbnail'])
                images.append(imageItem)

                colorItem = Color()
                colorItem['type'] = 'color'
                colorItem['from_site'] = self.name
                colorItem['show_product_id'] = baseItem['show_product_id']
                colorItem['images'] = images
                colorItem['cover'] = imageItem['thumbnail']
                colorItem['name'] = 'onecolor'
                #             import pdb;pdb.set_trace()
                yield colorItem

                skus = []
                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['show_product_id'] = baseItem['show_product_id']
                skuItem['from_site'] = self.name
                list_price = sel.xpath('//span[@id="rowMSRP"]')
                if len(list_price) > 0:
                    skuItem['list_price'] = list_price.xpath(
                        './s/text()').extract()[0]
                    skuItem['current_price'] = sel.xpath(
                        '//div[@id="productprice"]/span/text()').extract()[0]
                    baseItem['list_price'] = skuItem['list_price']
                    baseItem['current_price'] = skuItem['current_price']
                else:
                    skuItem['current_price'] = sel.xpath(
                        '//div[@id="productprice"]/span/text()').extract()[0]
                    skuItem['list_price'] = skuItem['current_price']
                    baseItem['list_price'] = skuItem['list_price']
                    baseItem['current_price'] = skuItem['current_price']
                skuItem['is_outof_stock'] = False
                skuItem['color'] = 'onecolor'
                skuItem['id'] = baseItem['show_product_id']
                skuItem['size'] = 'onesize'
                skus.append(skuItem)
                baseItem['skus'] = skus
            yield baseItem
Example #21
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)

        catalogId = sel.xpath(
            '//input[contains(@id, "catalogId")]/@value').extract()[0]
        storeId = sel.xpath(
            '//input[contains(@id, "storeId")]/@value').extract()[0]
        show_product_id = sel.xpath(
            '//input[contains(@id, "productId")]/@value').extract()[0]
        #baseItem = response.meta['baseItem']
        baseItem = item
        baseItem['from_site'] = self.name
        baseItem['type'] = 'base'
        baseItem['title'] = sel.xpath(
            '//div[contains(@class, "pdd_title box")]//h3/text()').extract()[0]
        baseItem['show_product_id'] = show_product_id
        baseItem['desc'] = sel.xpath(
            '//div[contains(@class, "pdd_desc pdd_sub_item box")]').extract(
            )[0]
        baseItem['list_price'] = sel.xpath(
            '//p[contains(@class, "pdd_price")]//span/text()').extract()[0]
        if sel.xpath('//span[contains(@class, "promo_price")]'):
            baseItem['current_price'] = sel.xpath(
                '//span[contains(@class, "promo_price")]/text()').extract()[0]
        else:
            baseItem['current_price'] = baseItem['list_price']

        colors = []
        skus = []
        colorNames = []
        sizes = {'size': ['onesize']}

        color_coloum = sel.xpath(
            '//ul[contains(@class, "pdd_colors_list box")]//li')
        images_coloum = sel.xpath(
            '//div[contains(@id, "rl_pdd_cover_slider")]//ul[contains(@class, "box")]//li'
        )

        for colors_col in color_coloum:
            color = Color()
            color['type'] = 'color'
            color['from_site'] = self.name
            color['show_product_id'] = show_product_id

            color['cover'] = colors_col.xpath('./a/img/@src').extract()[0]
            color['name'] = colors_col.xpath('./a/img/@alt').extract()[0]
            colorNames.append(color['name'])

            if colors_col.xpath(
                    './a[contains(@class, "pdd_color pdd_color_picked")]//span/@onclick'
            ):
                images = []

                for images_col in images_coloum:
                    imageItem = ImageItem()

                    imageItem['image'] = images_col.xpath(
                        './img/@src').extract()[0]
                    imageItem['thumbnail'] = images_col.xpath(
                        './img/@small').extract()[0]

                    images.append(imageItem)

                color['images'] = images
                yield color
            else:
                #print re.findall(r'\(.*?\)', clickParam)
                clickParam = colors_col.xpath(
                    './a[contains(@class, "pdd_color")]//span/@onclick'
                ).extract()[0]
                clickParams = re.findall(r"'(.*)'", clickParam)[0].split(',')
                MFPARTNUMBER = clickParams[5].replace("'", "")
                imgUrl = '%s/webapp/wcs/stores/servlet/ProductDetailFullImageView?catalogId=%s&langId=-1&storeId=%s&MFPARTNUMBER=%s' % (
                    self.base_url, catalogId, storeId, MFPARTNUMBER)

                yield Request(imgUrl.encode('UTF-8'),
                              meta={'color': color},
                              callback=self.parse_img)
                '''
                quantityUrl = 'http://www.ralphlauren.asia/webapp/wcs/stores/servlet/ProductDetailQuantityView?catalogId=12551&langId=-1&storeId=12151'
                formdata = {
                        'SKUId': str(clickParam[2]),
                        'objectId':'',
                        'requesttype':'ajax'
                        } 
                yield FormRequest(url=sizeUrl, formdata=formdata, callback=self.parse_quantity, meta={ '': '' } )

                sizeUrl = self.base_url + '/webapp/wcs/stores/servlet/ProductDetailSizeSelectView?catalogId=12551&langId=-1&storeId=12151'
                formdata = {
                        'Id': str(clickParam[1]),
                        'SKUId': str(clickParam[2]),
                        'Color': 'Lime',
                        'ColorId': str(clickParam[5]),
                        'Size': '',
                        'InItemSppSplitChar':'@@',
                        'objectId':'',
                        'requesttype':'ajax'
                        }
                yield FormRequest(url=sizeUrl, formdata=formdata, callback=self.parse_size, meta={ 'color': color } )
                '''
        ###
        sku_coloum = sel.xpath(
            '//select[contains(@id, "rl_pdd_size")]//option ')
        for sku_col in sku_coloum:
            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = show_product_id
            skuItem['from_site'] = self.name
            skuItem['list_price'] = baseItem['list_price']
            skuItem['current_price'] = baseItem['current_price']
            skuItem['is_outof_stock'] = False
            skuItem['id'] = sel.xpath(
                '//input[contains(@id, "selectSKUId")]/@value').extract()[0]
            skuItem['color'] = sel.xpath(
                '//span[contains(@class, "pdd_current_color")]/text()'
            ).extract()[0]
            #skuItem['size'] = sku_col.xpath('./option/text()').extract()[0]
            skuItem['quantity'] = sel.xpath(
                '//select[contains(@id, "rl_pdd_qty")]//option/@value'
            ).extract()[0]
            skus.append(skuItem)

        baseItem['colors'] = colorNames
        baseItem['sizes'] = sizes
        baseItem['skus'] = skus
        baseItem['dimensions'] = ""
        baseItem['brand'] = 'ralph lauren'
        baseItem['category'] = sel.xpath(
            '//div[contains(@class, "bread bread_bar")]//a[4]/text()').extract(
            )[0]
        baseItem['product_type'] = sel.xpath(
            '//div[contains(@class, "bread bread_bar")]//a[3]/text()').extract(
            )[0]

        yield baseItem
Example #22
0
    def handle_parse_item(self, response, item):

        sel = Selector(response)

        product_id_div = sel.xpath(
            '//div[@id="storeCatalogEntryID"]/text()').extract()

        if len(product_id_div) == 0 or len(product_id_div[0].strip()) == 0:
            return

        product_id = sel.xpath(
            '//div[@id="storeCatalogEntryID"]/text()').extract()[0].strip()

        item['show_product_id'] = product_id

        sku_infos_str = sel.xpath('//div[@id="entitledItem_' + product_id +
                                  '"]/text()').re(r'\[[\s\S]+\]')[0].strip()

        context = execjs.compile('''
            var sku_info = %s;
            function getSkuInfo(){
                return sku_info;
            }
        ''' % sku_infos_str)

        sku_infos = context.call('getSkuInfo')

        dimensions = set([])
        sizes = {}
        colors = []
        color_names = []
        color_name_images_map = {}

        color_list = sel.xpath('//ul[@class="detail_color"]/li')
        if color_list:
            for color_li in color_list:
                color_item = Color()
                color_item['show_product_id'] = product_id
                color_item['from_site'] = self.name
                color_item['type'] = 'color'
                color_item['name'] = color_li.xpath(
                    './a//div[@class="colorName"]/span/text()').extract(
                    )[0].strip()

                color_names.append(color_item['name'])
                colors.append(color_item)
        else:
            color_list = sel.xpath('//div[@class="color_swatch_list"]')[:-1]
            if color_list:
                for color_li in color_list:
                    for color_l in color_li.xpath('./ul/li'):
                        c = color_l.xpath('./a/@title').extract()[0]
                        color_item = Color()
                        color_item['show_product_id'] = product_id
                        color_item['from_site'] = self.name
                        color_item['type'] = 'color'
                        color_item['name'] = c

                        color_names.append(color_item['name'])
                        colors.append(color_item)
            else:
                color_item = Color()
                color_item['name'] = 'one color'
                color_names.append(color_item['name'])
                colors.append(color_item)

        sel.xpath('//ul[@id="thumb1"]/li')

        skus = []

        for sku_info in sku_infos:
            sku_item = SkuItem()

            sku_id = sku_info['catentry_id']

            sku_item['id'] = sku_id
            sku_item['type'] = 'sku'
            sku_item['from_site'] = self.name
            sku_item['show_product_id'] = product_id

            attributes = sku_info['Attributes']
            sku_size = {}
            if attributes == {}:
                attributes = {'vendorcolor_one color', 'size_one size'}
            else:
                for tempKey in attributes.keys():
                    if tempKey.find('Size') != -1:
                        temp = 1
                        break
                    else:
                        temp = 0
                if temp == 0:
                    attributes['size_one size'] = '2'

            for attribute in attributes:

                keys = attribute.split('_')
                dimension = keys[0].lower()
                value = keys[1]
                if dimension == 'vendorcolor':
                    dimension = 'color'
                    if value not in color_name_images_map.keys():

                        if 'ItemThumbnailImage' not in sku_info.keys():
                            return
                        thumbnail = sku_info['ItemThumbnailImage']
                        if not re.match(r'^http', thumbnail):
                            thumbnail = 'http:' + thumbnail

                        image = sku_info['ItemImage']
                        if not re.match(r'^http', image):
                            image = 'http:' + image
                        image = image + '&wid=970&hei=1245&fit=fit,1'

                        color_name_images_map[value] = {
                            'images': [{
                                'thumbnail': thumbnail,
                                'image': image
                            }]
                        }

                        color_cover = sku_info['ItemSwatchImage2']
                        if not re.match(r'^http', color_cover):
                            color_cover = 'http:' + color_cover
                        color_name_images_map[value]['cover'] = color_cover

                dimensions.add(dimension)

                sku_size[dimension] = value

                if dimension != 'color':
                    if dimension not in sizes.keys():
                        sizes[dimension] = set([])

                    sizes[dimension].add(value)

            sku_item['size'] = sku_size

            if sku_info['offerPrice'] == '':
                return
            else:
                sku_item['current_price'] = sku_info['offerPrice']

            if sku_info['listPrice'] == '':
                return
            else:
                sku_item['list_price'] = sku_info['listPrice']

            sku_item['current_price'] = handle_price(sku_item['current_price'])
            sku_item['list_price'] = handle_price(sku_item['list_price'])

            if float(sku_item['list_price']) < float(
                    sku_item['current_price']):
                sku_item['list_price'] = sku_item['current_price']

            if sku_info['availableQuantity']:
                sku_item['quantity'] = int(float(
                    sku_info['availableQuantity']))
            else:
                sku_item['quantity'] = 0
            sku_item['is_outof_stock'] = sku_info['outOfStock']

            if 'color' not in sku_item.keys():
                sku_item['color'] = 'one color'
            elif sku_item['color'] == {}:
                sku_item['color'] = 'one color'

            if 'size' not in sku_item.keys():
                sku_item['size'] = 'one size'
            elif sku_item['size'] == {}:
                sku_item['size'] = 'one size'

            skus.append(sku_item)

        for color in colors:
            if color['name'] in color_name_images_map.keys():
                color['images'] = color_name_images_map[
                    color['name']]['images']
                color['cover'] = color_name_images_map[color['name']]['cover']
                yield color

        item['dimensions'] = list(dimensions)

        for size_key in sizes:
            sizes[size_key] = list(sizes[size_key])
        item['sizes'] = sizes
        item['colors'] = color_names

        item['skus'] = skus
        item['desc'] = sel.xpath('//div[@id="detial_main_content"]').extract()
        if item['desc']:
            item['desc'] = item['desc'][0]
        else:
            item['desc'] = sel.xpath(
                '//div[@class="descriptionsContent"]').extract()[0]

        yield item
Example #23
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)
        product_str = re.search('var dataLayer\s*=\s*(.+?);',
                                response.body).group(1)
        product_json = json.loads(product_str)
        product = product_json['products'][0]

        item['brand'] = 'Tiffany'
        item['title'] = product['name']
        if not product['price']:
            return
        item['list_price'] = product['price']
        item['current_price'] = product['price']
        item['desc'] = sel.xpath(
            '//div[@id="drawerDescription"]/div/div').extract()[0]
        item['cover'] = sel.xpath('//meta[@property="og:image"]/@content'
                                  ).extract()[0] + self.cover_img_surffix
        if product['stockStatus'] == 'out of stock':
            return
        skus = []
        sizes = []
        if not sel.xpath(
                '//select[@id="ctlSkuGroupType1_selItemList"]/option'):
            item['show_product_id'] = product['sku']
            skuItem = SkuItem()
            skuItem['type'] = "sku"
            skuItem['from_site'] = item['from_site']
            skuItem['size'] = 'One Size'
            sizes = ['One Size']
            skuItem['color'] = 'One Color'
            skuItem['id'] = item['show_product_id'] + '-' + skuItem[
                'color'] + '-' + skuItem['size']
            skuItem['show_product_id'] = item['show_product_id']
            skuItem['current_price'] = item['current_price']
            skuItem['list_price'] = item['list_price']
            skus.append(skuItem)
        else:
            item['show_product_id'] = product['groupSku']
            size_options = sel.xpath(
                '//select[@id="ctlSkuGroupType1_selItemList"]/option')
            for size_option in size_options:
                skuItem = SkuItem()
                skuItem['type'] = "sku"
                skuItem['from_site'] = item['from_site']
                if not size_option.xpath('./text()').extract():
                    skuItem['size'] = 'One Size'
                else:
                    skuItem['size'] = size_option.xpath(
                        './text()').extract()[0]
                sizes.append(skuItem['size'])
                skuItem['color'] = 'One Color'
                skuItem['id'] = item['show_product_id'] + '-' + skuItem[
                    'color'] + '-' + skuItem['size']
                skuItem['show_product_id'] = item['show_product_id']
                skuItem['current_price'] = item['current_price']
                skuItem['list_price'] = item['list_price']
                skus.append(skuItem)

        images = []
        imageItem = ImageItem()
        imageItem['thumbnail'] = sel.xpath(
            '//meta[@property="og:image"]/@content').extract(
            )[0] + self.large_img_surffix
        imageItem['image'] = sel.xpath('//meta[@property="og:image"]/@content'
                                       ).extract()[0] + self.large_img_surffix
        images.append(imageItem)

        color = Color()
        color['type'] = 'color'
        color['from_site'] = item['from_site']
        color['show_product_id'] = item['show_product_id']
        color['images'] = images
        color['name'] = 'One Color'
        color['cover'] = item['cover']
        yield color

        item['colors'] = ['One Color']
        item['sizes'] = sizes
        item['skus'] = skus
        item['dimensions'] = ['size']
        yield item
Example #24
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)

        product_id_var = re.search(r'var pr_page_id[\s]*=[\s]*([^;]+);',
                                   response.body)
        if not product_id_var:
            return
        # if len(sel.xpath('//div[@class="outOfStock outOfStockSpecial"]')) > 0:
        #     return
        product_id = eval(product_id_var.group(1))
        item['show_product_id'] = product_id

        templateid = sel.xpath(
            '//div[@id="templateOption"]/@templateid').extract()

        if len(templateid) > 0:
            templateid = templateid[0]

        color_rows = sel.xpath('//tr[contains(@class, "diaperItemTR")]')
        clothing_shoes_products = sel.xpath(
            '//div[contains(@class, "clothingShoesProducts")]')

        if templateid == '6':
            ''''''
            colorItems = {}
            colorNames = []

            oneColorDiv = sel.xpath('//div[@id="oneSelection"]')
            if len(oneColorDiv.extract()) > 0:
                colorItem = Color()

                color_cover = oneColorDiv.xpath(".//img/@src").extract()[0]

                if re.match(r'^//', color_cover):
                    color_cover = 'https:' + color_cover

                color_name = oneColorDiv.xpath("text()").extract()[0]

                colorItem["cover"] = color_cover

                colorItem["name"] = color_name
                colorItem['type'] = 'color'
                colorItem['from_site'] = self.from_site

                colorItems[color_name] = {"item": colorItem, "handled": False}

                colorNames.append(color_name)
            else:
                colorDivs = sel.xpath('//ul[@id="falvorDrownList"]/li')
                #colorDivs = sel.xpath('//div[contains(@class, "clothingShoesProducts")]/div[contains(@class, "clothProductItem")]//div[contains(@class, "colorPaneItems")]');

                for colorDiv in colorDivs:
                    colorItem = Color()

                    color_cover = colorDiv.xpath(".//img/@src").extract()[0]

                    if re.match(r'^//', color_cover):
                        color_cover = 'https:' + color_cover

                    color_name = colorDiv.xpath("@id").extract()[0]
                    colorItem["cover"] = color_cover

                    colorItem["name"] = color_name
                    colorItem['type'] = 'color'
                    colorItem['from_site'] = self.from_site

                    colorItems[color_name] = {
                        "item": colorItem,
                        "handled": False
                    }

                    colorNames.append(color_name)

            skuHiddens = sel.css('.multiItemBox').xpath(
                './/input[@class="skuHidden"]'
            )  #sel.xpath('//div[@id="clothItem"]//input[@class="skuHidden"]')

            skus = []
            sizes = []
            if len(skuHiddens) > 0:
                for skuHidden in skuHiddens:
                    if skuHidden.xpath('@value').extract()[0] != "":

                        skuItem = SkuItem()
                        skuItem['show_product_id'] = item['show_product_id']

                        regular_price = skuHidden.xpath(
                            '@regularprice').extract()
                        price = skuHidden.xpath('@price').extract()[0]
                        skuid = skuHidden.xpath('@value').extract()[0]
                        is_outof_stock = skuHidden.xpath(
                            '@isoutofstock').extract()[0]
                        skuItem['id'] = skuid

                        if len(regular_price) > 0 and regular_price[0] != '':
                            skuItem["list_price"] = regular_price[0]
                        else:
                            skuItem["list_price"] = price

                        skuItem['current_price'] = price

                        replace_skuid = skuid.replace('-', '_')
                        size = sel.xpath(
                            '//ul[@id="diaperItemTR' + replace_skuid +
                            '"]//li[@class="itemSize"]/span/text()').extract(
                            )[0].strip()
                        color_name = sel.xpath('//ul[@id="diaperItemTR' +
                                               replace_skuid +
                                               '"]/@primaryattr').extract()[0]

                        skuItem['size'] = size

                        if not size in sizes:
                            sizes.append(size)

                        skuItem['color'] = color_name
                        skuItem[
                            'is_outof_stock'] = self.change_out_of_stock_str(
                                is_outof_stock)

                        skuItem['type'] = 'sku'
                        skuItem['from_site'] = self.from_site
                        #yield skuItem

                        skus.append(skuItem)

                        if colorItems[color_name]["handled"] == False:

                            colorItems[color_name]["handled"] = True

                            url = 'https://www.diapers.com/product/productDetail!GetSkuImage.qs?skuCode=' + skuid + '&random=' + '%f' % random(
                            )

                            yield Request(url,
                                          meta={
                                              'item':
                                              colorItems[color_name]['item'],
                                              'show_product_id':
                                              item['show_product_id']
                                          },
                                          callback=self.parse_image)

                item["skus"] = skus
                item['sizes'] = sizes
                item['colors'] = colorNames

        elif len(color_rows.extract()) > 0:
            '''只有一个尺寸表格'''
            skus = []
            colorNames = []
            for color_row in color_rows:

                colorItem = Color()
                colorItem['from_site'] = self.from_site
                colorItem['type'] = 'color'

                skuHidden = color_row.xpath('.//input[@class="skuHidden"]')

                skuItem = SkuItem()
                skuItem['show_product_id'] = item['show_product_id']

                regular_price = skuHidden.xpath('@regularprice').extract()
                price = skuHidden.xpath('@price').extract()[0]
                skuid = skuHidden.xpath('@value').extract()[0]
                is_outof_stock = skuHidden.xpath('@isoutofstock').extract()[0]
                skuItem['id'] = skuid
                # print response.url
                # print is_outof_stock
                # print skuHidden.extract()
                if len(regular_price) > 0 and regular_price[0] != '':
                    skuItem["list_price"] = regular_price[0]
                else:
                    skuItem["list_price"] = price

                skuItem['current_price'] = price

                color_cover = color_row.xpath(
                    'td[@class="itemImage"]/span[contains(@class, "itemImageDiv")]/img/@src'
                ).extract()[0]

                if re.match(r'^//', color_cover):
                    color_cover = 'https:' + color_cover
                colorItem['cover'] = color_cover

                color_name = color_row.xpath(
                    'td[contains(@class, "Description")]/text()').extract()

                if len(color_name) > 0:
                    color_name = color_name[0].strip()
                else:
                    color_name = color_row.xpath(
                        'td[contains(@class, "elseDescription")]/text()'
                    ).extract()
                    if len(color_name) > 0:
                        color_name = color_name[0].strip()
                    else:
                        color_name = color_row.xpath(
                            'td[contains(@class, "itemDescription")]/text()'
                        ).extract()
                        if len(color_name) > 0:
                            color_name = color_name[0].strip()

                if not color_name:
                    color_name = 'onecolor'

                colorItem['name'] = color_name
                colorNames.append(color_name)

                skuItem['color'] = color_name
                skuItem['size'] = 'onesize'
                skuItem['is_outof_stock'] = self.change_out_of_stock_str(
                    is_outof_stock)

                skuItem['type'] = 'sku'
                skuItem['from_site'] = self.from_site
                #yield skuItem

                skus.append(skuItem)

                url = 'https://www.diapers.com/product/productDetail!GetSkuImage.qs?skuCode=' + skuid + '&random=' + '%f' % random(
                )

                yield Request(url,
                              meta={
                                  'item': colorItem,
                                  'show_product_id': item['show_product_id']
                              },
                              callback=self.parse_image)

            item["skus"] = skus
            item['sizes'] = ['onesize']
            item['colors'] = colorNames

        elif len(clothing_shoes_products) > 0:
            '''最常见的格式'''
            colorDivs = sel.xpath(
                '//div[contains(@class, "clothingShoesProducts")]/div[contains(@class, "clothProductItem")]//div[contains(@class, "colorPaneItems")]'
            )
            colorItems = {}
            colorNames = []
            for colorDiv in colorDivs:
                colorItem = Color()

                if len(colorDiv.xpath('./img').extract()) > 0:
                    color_cover = colorDiv.xpath("./img/@src").extract()[0]

                    if re.match(r'^//', color_cover):
                        color_cover = 'https:' + color_cover

                    color_name = colorDiv.xpath("./img/@color").extract()[0]
                    colorItem["cover"] = color_cover
                elif len(colorDiv.xpath("./div/@style").extract()) > 0:
                    cover_style = colorDiv.xpath("./div/@style").extract()[0]
                    color_name = colorDiv.xpath("./div/@color").extract()[0]
                    if 'background:' in cover_style:
                        cover_style = re.search('background:([^;]+)',
                                                cover_style).group(1)
                    colorItem["cover_style"] = cover_style
                else:
                    return

                colorItem["name"] = color_name
                colorItem['type'] = 'color'
                colorItem['from_site'] = self.from_site

                colorItems[color_name] = {"item": colorItem, "handled": False}

                colorNames.append(color_name)

            skuHiddens = sel.xpath(
                '//div[@id="clothItem"]//input[@class="skuHidden"]')

            skus = []
            sizes = []
            if len(skuHiddens) > 0:
                for skuHidden in skuHiddens:
                    if skuHidden.xpath('@value').extract()[0] != "":
                        skuItem = SkuItem()
                        skuItem['show_product_id'] = item['show_product_id']

                        regular_price = skuHidden.xpath(
                            '@regularprice').extract()
                        price = skuHidden.xpath('@price').extract()[0]
                        skuid = skuHidden.xpath('@value').extract()[0]
                        is_outof_stock = skuHidden.xpath(
                            '@isoutofstock').extract()[0]
                        skuItem['id'] = skuid

                        if len(regular_price) > 0 and regular_price[0] != '':
                            skuItem["list_price"] = regular_price[0]
                        else:
                            skuItem["list_price"] = price

                        skuItem['current_price'] = price

                        size = sel.xpath(
                            '//div[@id="clothItem"]//input[@sku="' + skuid +
                            '"]/@value').extract()[0]
                        color_name = sel.xpath(
                            '//div[@id="clothItem"]//input[@sku="' + skuid +
                            '"]/@primaryattributevalue').extract()[0]

                        skuItem['size'] = size

                        if not size in sizes:
                            sizes.append(size)

                        skuItem['color'] = color_name
                        skuItem[
                            'is_outof_stock'] = self.change_out_of_stock_str(
                                is_outof_stock)

                        skuItem['type'] = 'sku'
                        skuItem['from_site'] = self.from_site
                        #yield skuItem

                        skus.append(skuItem)
                        if colorItems[color_name]["handled"] == False:

                            colorItems[color_name]["handled"] = True

                            url = 'https://www.diapers.com/product/productDetail!GetSkuImage.qs?skuCode=' + skuid + '&random=' + '%f' % random(
                            )

                            yield Request(url,
                                          meta={
                                              'item':
                                              colorItems[color_name]['item'],
                                              'show_product_id':
                                              item['show_product_id']
                                          },
                                          callback=self.parse_image)

                item["skus"] = skus
                item['sizes'] = sizes
                item['colors'] = colorNames

        elif len(
                sel.xpath(
                    '//div[@id="templateOption"]/div[contains(@class, "colorSizeFirstStep")]//li[contains(@class, "colorPaneItems")]'
                ).extract()) > 0:
            '''判断是否是只有颜色没有尺寸的情况'''
            colorDivs = sel.xpath(
                '//div[@id="templateOption"]/div[contains(@class, "colorSizeFirstStep")]//li[contains(@class, "colorPaneItems")]'
            )
            colorItems = {}
            colorNames = []
            skus = []
            for colorDiv in colorDivs:
                colorItem = Color()

                if len(colorDiv.xpath('./img').extract()) > 0:
                    color_cover = colorDiv.xpath("./img/@src").extract()[0]

                    if re.match(r'^//', color_cover):
                        color_cover = 'https:' + color_cover

                    color_name = colorDiv.xpath("./img/@color").extract()[0]
                    colorItem["cover"] = color_cover
                else:
                    cover_style = colorDiv.xpath("./div/@style").extract()[0]
                    color_name = colorDiv.xpath("./div/@color").extract()[0]
                    if 'background:' in cover_style:
                        cover_style = re.search('background:([^;]+)',
                                                cover_style).group(1)
                    colorItem["cover_style"] = cover_style

                colorItem["name"] = color_name
                colorItem['type'] = 'color'
                colorItem['from_site'] = self.from_site

                colorItems[color_name] = {"item": colorItem, "handled": False}

                colorNames.append(color_name)

                skuid = colorDiv.xpath('./img/@sku').extract()[0]

                skuHidden = sel.xpath('//input[@id="skuHidden' +
                                      skuid.replace('-', '_') + '"]')

                skuItem = SkuItem()
                skuItem['show_product_id'] = item['show_product_id']

                regular_price = skuHidden.xpath('@regularprice').extract()
                price = skuHidden.xpath('@price').extract()[0]
                is_outof_stock = skuHidden.xpath('@isoutofstock').extract()[0]
                skuItem['id'] = skuid

                if len(regular_price) > 0 and regular_price[0] != '':
                    skuItem["list_price"] = regular_price[0]
                else:
                    skuItem["list_price"] = price

                skuItem['current_price'] = price
                size = 'onesize'

                skuItem['size'] = size

                skuItem['color'] = color_name
                skuItem['is_outof_stock'] = self.change_out_of_stock_str(
                    is_outof_stock)

                skuItem['type'] = 'sku'
                skuItem['from_site'] = self.from_site
                #yield skuItem

                skus.append(skuItem)

                url = 'https://www.diapers.com/product/productDetail!GetSkuImage.qs?skuCode=' + skuid + '&random=' + '%f' % random(
                )

                yield Request(url,
                              meta={
                                  'item': colorItem,
                                  'show_product_id': item['show_product_id']
                              },
                              callback=self.parse_image)

            item['sizes'] = ['onesize']
            item["skus"] = skus
            item['colors'] = colorNames

        elif len(
                sel.xpath(
                    '//div[@id="templateOption"]/div[contains(@class, "colorSizeFirstStep")]'
                ).extract()) > 0:
            colorItem = Color()
            #             print sel.xpath('//div[@id="QtyInputDiv"]//li[contains(@class,"itemImage")]').extract()
            #             color_cover = sel.xpath('//div[@id="QtyInputDiv"]//li[contains(@class,"itemImage")]//img/@src').extract()[0]
            color_name = 'onecolor'

            #             colorItem['cover'] = color_cover
            colorItem['name'] = color_name
            colorItem['type'] = 'color'
            colorItem['from_site'] = self.from_site

            sku_sizes = sel.css('.colorSizeFirstStep .collectionSelections'
                                ).xpath('./li/input')

            skus = []
            sizes = []
            colorItemSku = ''
            for sku_size in sku_sizes:
                ''''''
                skuItem = SkuItem()
                skuItem['show_product_id'] = item['show_product_id']

                skuid = sku_size.xpath('@sku').extract()[0]
                skuHidden = sel.xpath('//input[@id="skuHidden' +
                                      skuid.replace('-', '_') + '"]')

                regular_price = skuHidden.xpath('@regularprice').extract()
                price = skuHidden.xpath('@price').extract()[0]
                is_outof_stock = skuHidden.xpath('@isoutofstock').extract()[0]

                skuItem['id'] = skuid
                if len(regular_price) > 0 and regular_price[0] != '':
                    skuItem["list_price"] = regular_price[0]
                else:
                    skuItem["list_price"] = price
                skuItem['current_price'] = price

                size = sel.xpath('//img[@id="' + skuid.replace('-', '_') +
                                 'ColorButton"]/@colorvalue').extract()[0]
                skuItem['size'] = size

                if not size in sizes:
                    sizes.append(size)

                skuItem['color'] = color_name
                skuItem['is_outof_stock'] = self.change_out_of_stock_str(
                    is_outof_stock)

                skuItem['type'] = 'sku'
                skuItem['from_site'] = self.from_site
                #yield skuItem

                if colorItemSku == '':
                    colorItemSku = skuid
                    color_cover = sel.xpath(
                        '//img[@id="' + skuid.replace('-', '_') +
                        'ColorButton"]/@imgsrc').extract()[0]

                    if re.match(r'^//', color_cover):
                        color_cover = 'https:' + color_cover

                    colorItem['cover'] = color_cover

                skus.append(skuItem)

            item["skus"] = skus
            item['sizes'] = sizes
            item["colors"] = ['onecolor']

            url = 'https://www.diapers.com/product/productDetail!GetSkuImage.qs?skuCode=' + colorItemSku + '&random=' + '%f' % random(
            )

            yield Request(url,
                          meta={
                              'item': colorItem,
                              'show_product_id': item['show_product_id']
                          },
                          callback=self.parse_image)

        elif templateid == '10':

            # primary_attr = sel.xpath('//table[@id="primaryAttributeList"]/@attributename')
            # primary_attr = primary_attr.extract()[0].strip().lower() if len(primary_attr)>0 else ''
            #
            # second_attr = sel.xpath('//table[@id="secondAttributeList"]/@attributename')
            # second_attr = second_attr.extract()[0].strip().lower() if len(second_attr)>0 else ''
            #
            # third_attr = sel.xpath('//table[@id="thirdAttributeList"]/@attributename')
            # third_attr = third_attr.extract()[0].strip().lower() if len(third_attr)>0 else ''
            #
            # item['dimensions'] = [dimension for dimension in [primary_attr,second_attr,third_attr] if dimension]
            #
            # primary_attr_ids = sel.xpath('//table[@id="primaryAttributeList"]/tr/@id').extract()
            # second_attr_ids = sel.xpath('//table[@id="secondAttributeList"]/tr/@id').extract()
            # third_attr_ids = sel.xpath('//table[@id="thirdAttributeList"]/tr/@id').extract()
            #
            # primary_attr_dict = {}
            # second_attr_dict = {}
            # third_attr_dict = {}
            # item['sizes'] = {}
            skus = []
            # if primary_attr and len(primary_attr_ids) > 0:
            #     item['sizes'][primary_attr] = []
            #     for primary_attr_id in primary_attr_ids:
            #         primary_attr_value = sel.xpath('//table[@id="primaryAttributeList"]/tr[@id="' + str(primary_attr_id) + '"]/td[@class="attributeValue   "]/b/text()').extract()[0]
            #         item['sizes'][primary_attr].append(primary_attr_value)
            #         primary_attr_dict[primary_attr_id] = primary_attr_value
            #
            # if second_attr and len(second_attr_ids) > 0:
            #     item['sizes'][second_attr] = []
            #     for second_attr_id in second_attr_ids:
            #         second_attr_value = sel.xpath('//table[@id="secondAttributeList"]/tr[@id="' + str(second_attr_id) + '"]/td[@class="attributeValue   "]/b/text()').extract()[0]
            #         item['sizes'][second_attr].append(second_attr_value)
            #         second_attr_dict[second_attr_id] = second_attr_value
            #
            # if third_attr and len(third_attr_ids) > 0:
            #     item['sizes'][third_attr] = []
            #     for third_attr_id in third_attr_ids:
            #         third_attr_value = sel.xpath('//table[@id="thirdAttributeList"]/tr[@id="' + str(third_attr_id) + '"]/td[@class="attributeValue   "]/b/text()').extract()[0]
            #         item['sizes'][third_attr].append(third_attr_value)
            #         third_attr_dict[third_attr_id] = third_attr_value

            # if 'sizes' not in item['sizes'].keys():
            #     item['sizes']['size'] = ['One Size']
            # if 'color' not in item['sizes'].keys():
            #     item['colors'] = ['One Color']

            product_json_str = re.search('var pdpOptionsJson\s*=\s*([^\n]+);',
                                         response.body)
            if product_json_str:
                product_json_str = product_json_str.group(1)
            product_json = json.loads(product_json_str)
            colors = []
            for sku_json in product_json:
                skuItem = SkuItem()
                skuItem['id'] = sku_json['Sku']
                skuItem['show_product_id'] = item['show_product_id']
                skuItem['size'] = {}
                skuItem['color'] = sku_json['Description']
                skuItem['size'] = 'One Size'
                colors.append(skuItem['color'])

                # if primary_attr and len(primary_attr_dict) > 0 and len(sku_json['PrimaryAttributeValue'])>0:
                #     skuItem['size'][primary_attr] = primary_attr_dict[sku_json['PrimaryAttributeValue']]
                #
                # if second_attr and len(second_attr_dict) > 0 and len(sku_json['SecondAttributeValue'])>0:
                #     skuItem['size'][second_attr] = second_attr_dict[sku_json['SecondAttributeValue']]
                #
                # if third_attr and len(third_attr_dict) > 0 and len(sku_json['ThirdAttributeValue'])>0:
                #     skuItem['size'][third_attr] = third_attr_dict[sku_json['ThirdAttributeValue']]

                # if len(skuItem['size']) == 0:
                #     skuItem['size'] = 'One Size'
                #     skuItem['color'] = 'One Color'
                # else:
                #     if 'size' not in skuItem['size'].keys():
                #         skuItem['size'] = 'One Size'
                #     if 'color' not in skuItem['size'].keys():
                #         skuItem['color'] = 'One Color'

                skuItem['current_price'] = sku_json['RetailPrice']
                if not sku_json['RegularPrice']:
                    skuItem["list_price"] = sku_json['RetailPrice']
                else:
                    skuItem['list_price'] = sku_json['RegularPrice']

                skuItem['is_outof_stock'] = self.change_out_of_stock_str(
                    sku_json['IsOutOfStock'])
                skuItem['type'] = 'sku'
                skuItem['from_site'] = self.from_site
                skus.append(skuItem)

                colorItem = Color()
                colorItem['name'] = skuItem['color']
                colorItem['type'] = 'color'
                colorItem['from_site'] = self.from_site
                color_url = 'https://www.diapers.com/product/productDetail!GetSkuImage.qs?skuCode=' + skuItem[
                    "id"] + '&random=' + '%f' % random()

                yield Request(color_url,
                              meta={
                                  'item': colorItem,
                                  'show_product_id': item['show_product_id']
                              },
                              callback=self.parse_image)

            item['skus'] = skus
            item['colors'] = colors
            item['sizes'] = ['One Size']

        else:
            '''告警'''
            # print response.meta['url']
            return

        desc = sel.css('.descriptTabContent').xpath(
            "//div[@class='pIdDesContent']").extract()

        if len(desc) > 0:
            item['desc'] = desc[0]
        else:
            item['desc'] = ''
        '''handle size info'''
        size_chart_type = re.search(r'var sizeChartType[\s]*=[\s]*"([^"]+)";',
                                    response.body)
        size_info_brand_name = re.search(r'var brandName[\s]*=[\s]*"([^"]+)";',
                                         response.body)

        if size_chart_type and size_info_brand_name:

            size_info_brand_name = size_info_brand_name.group(1)
            size_chart_type = size_chart_type.group(1)

            item['size_info'] = {
                'brand_name': size_info_brand_name,
                'size_chart_type': size_chart_type
            }
            '''size info chart url'''
            size_info_chart_url = 'https://www.diapers.com/Product/BrandSizeChartHopup.qs?brandName=' + quote(
                size_info_brand_name) + '&sizeChartType=' + quote(
                    size_chart_type)

            #print size_info_chart_url

            yield Request(size_info_chart_url,
                          meta={
                              'brand_name': size_info_brand_name,
                              'size_chart_type': size_chart_type
                          },
                          callback=self.parse_size_info)

        yield item
Example #25
0
    def handle_parse_item(self, response, item):

        sel = Selector(response)

        addToCardFrom = sel.xpath('//form[@name="addToCart"]').extract()

        if len(addToCardFrom) == 0:
            return
        else:
            if re.search(r'SOLD OUT', addToCardFrom[0]):
                return

        if 'list_price' not in item.keys():
            price_p = sel.xpath(
                '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/table[1]/tr/td[1]/table/tr/td/p[last()]'
            )

            current_price = price_p.xpath('font[1]/text()').re(
                r'Now:\s*\$([\d.]+)')

            if len(current_price) > 0:
                current_price = current_price[0]
            else:
                if len(
                        price_p.xpath('font[1]/text()').re(
                            r'Sale:\s*\$([\d.]+)')) > 0:
                    current_price = price_p.xpath('font[1]/text()').re(
                        r'Sale:\s*\$([\d\.]+)')[0]
                else:
                    current_price = sel.xpath(
                        '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/table[1]/tr/td[1]/table/tr/td/font[1]/text()'
                    ).extract()[0]
                    if 'Sale:' in current_price:
                        current_price = current_price.replace('Sale:', '')
                    elif 'Today:' in current_price:
                        current_price = current_price.replace('Today:', '')

            list_price_p = price_p.xpath(
                'font[@class="strike"]/font/text()').extract()

            if len(list_price_p) > 0:
                list_price = list_price_p[0].strip()
            else:
                price_p_html = price_p.extract()[0]
                list_price_match = re.search(r'Orig:\s*\$([\d\.]+)',
                                             price_p_html)
                if list_price_match is None:
                    list_price = sel.xpath(
                        '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/table[1]/tr/td[1]/table/tr/td/font[2]/text()'
                    ).re(r'Orig:\s*\$([\d\.]+)')[0]
                else:
                    list_price = re.search(r'Orig:\s*\$([\d\.]+)',
                                           price_p_html).group(1).strip()

            item['list_price'] = list_price
            item['current_price'] = current_price

        brand = sel.xpath(
            '//div/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/table[1]/tr/td[1]/table/tr/td/div/a/text()'
        )
        if len(brand) > 0:
            brand = brand.re(r'About the\s*(.+)\s+Brand$')[0]
        else:
            brand = 'New Balance'

        description = sel.xpath(
            '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/div[1]/table/tr[1]/td/text()'
        ).extract()

        if len(description) > 0:
            description = '<div>' + description[0] + '</div>'
        elif len(
                sel.xpath(
                    '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/div[1]/table/tr[1]/td/p/text()'
                ).extract()) > 0:
            description = '<div>' + sel.xpath(
                '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/div[1]/table/tr[1]/td/p/text()'
            ).extract()[0] + '</div>'
        else:
            description = ''

        description2 = sel.xpath(
            '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/div[2]/table/tr[1]/td/ul'
        ).extract()

        if len(description2) > 0:
            description = '<div>' + description2[0] + '</div>'

        if len(description) == 0:
            description = '暂无'

        style_category_font = sel.xpath(
            '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/table[1]/tr/td[1]/table/tr/td/p[1]/font[last()]/text()'
        )
        show_product_id = style_category_font.re(r'Style:\s*(.+)')[0]
        category = style_category_font.extract()[1]

        select_options = sel.xpath(
            '//div[1]/div/div/table/tr[2]/td/table[3]/tr/td[2]/table[3]/tr/td[2]/table[1]/tr/td[3]/form/table[1]/tr[2]/td/select/option'
        )

        sizes = []
        skus = []
        for select_option in select_options:
            if select_option.xpath('@value').extract_first() == 'select':
                continue

            size = select_option.xpath('text()').extract_first()
            size = size.replace(u'\xa0', '').encode('utf-8')

            sku_item = SkuItem()

            id = select_option.xpath('@value')
            if len(id) > 0:
                id = id.extract()[0]
            else:
                id = size

            sku_item['type'] = 'sku'
            sku_item['show_product_id'] = show_product_id
            sku_item['from_site'] = item['from_site']
            sku_item['id'] = id
            sku_item['list_price'] = item['list_price']
            sku_item['current_price'] = item['current_price']
            sku_item['size'] = size
            sku_item['color'] = 'onecolor'
            sku_item['is_outof_stock'] = False

            sizes.append(size)
            skus.append(sku_item)

        item['skus'] = skus
        item['sizes'] = {'size': sizes}
        item['dimensions'] = ['size']
        item['colors'] = ['onecolor']

        item['brand'] = brand
        item['desc'] = description
        item['show_product_id'] = show_product_id
        if 'category' not in item.keys():
            item['category'] = category

        yield item

        color_url = self.base_url + 'larger_view.asp?style=' + show_product_id

        yield Request(color_url,
                      callback=self.parse_color,
                      meta={'item': item})
Example #26
0
    def handle_parse_item(self, response, item):
        sel = Selector(response)

        if len(sel.xpath(".//div[@class='atg_store_noMatchingItem']")) > 0:
            return
        info = sel.xpath(".//div[@class='firstContainer row']")
        item['brand'] = info.xpath("./h1/a[1]/text()").extract()[0]
        item['show_product_id'] = info.xpath(
            "./h1/h2/text()").extract()[0].strip()
        item['title'] = info.xpath("./h1/a[2]/text()").extract()[0]
        # item['desc'] = info.xpath("./h3/text()").extract()[0]
        item['colors'] = []

        if len(sel.xpath(".//div[@id='tab1_info']")) > 0:
            if len(sel.xpath(".//div[@id='tab1_info']/div[2]")) > 0:
                item['desc'] = sel.xpath(
                    ".//div[@id='tab1_info']/div[1]/table").extract(
                    )[0] + sel.xpath(
                        ".//div[@id='tab1_info']/div[2]/table").extract()[0]
            else:
                item['desc'] = sel.xpath(
                    ".//div[@id='tab1_info']/div[1]/table").extract()[0]

        skusStr = "".join(
            re.findall(r'window.universal_variable =.+\}\}<\/script>',
                       response.body, re.S))

        if len(skusStr) > 0:
            context = execjs.compile('''
                var skus = %s;
                function getSkus(){
                    return skus;
                }
            ''' % skusStr[27:-9])
            skusDict = context.call('getSkus')

        item['list_price'] = skusDict['product']['unit_price']
        item['current_price'] = skusDict['product']['unit_sale_price']
        images = []
        imageDom = sel.xpath(".//ul[@class='alt_imgs col-md-12']/li")
        colorItem = Color()
        for dom in imageDom:
            imageItem = ImageItem()
            imageItem['image'] = self.base_url + dom.xpath(
                "./a/@href").extract()[0]
            imageItem['thumbnail'] = re.sub('XA\.', 'LA.', imageItem['image'])
            images.append(imageItem.copy())

        colorItem['images'] = images
        colorItem['type'] = 'color'
        colorItem['from_site'] = item['from_site']
        colorItem['show_product_id'] = item['show_product_id']
        colorItem['name'] = u'one color'
        colorItem['cover'] = self.base_url + sel.xpath(
            ".//ul[@class='alt_imgs col-md-12']/li[1]/a/img/@src").extract()[0]

        yield colorItem

        item['colors'].append(colorItem['name'])
        item['dimensions'] = ['size']
        item['skus'] = []
        sku_item_url_list = []
        sku_size_list = []
        index = 0
        sku_items = sel.xpath(".//div[@id='sizeValues']/div")
        if len(sku_items) > 0:
            for sku_item in sku_items:
                sku_size = sku_item.xpath("./@onclick").extract()[0].split(
                    "'")[3]
                ajax_id = sku_item.xpath("./@onclick").extract()[0].split(
                    "'")[1]
                if sku_size.find(' ') != -1:
                    sku_size = re.sub(' ', '%20', sku_size)
                sku_item_url = self.base_url + sel.xpath(
                    ".//form[@id='colorsizerefreshform']/@action"
                ).extract(
                )[0] + '&productId=' + ajax_id + '&selectedSize=' + sku_size
                sku_item_url_list.append(sku_item_url)
                sku_size_list.append(sku_size)
            sku_item_url_list.append(
                sku_item_url
            )  # only for avoiding indexError in parse_sku_item when loop reach the last size
            yield Request(sku_item_url_list[0],
                          callback=self.parse_sku_item,
                          meta={
                              "sku_size_list": sku_size_list,
                              "sku_item_url_list": sku_item_url_list,
                              "item": item,
                              "index": index
                          })
        else:
            skuItem = SkuItem()
            skuItem['type'] = 'sku'
            skuItem['show_product_id'] = skusDict['product']['id']
            skuItem['list_price'] = item['list_price']
            skuItem['current_price'] = item['current_price']
            skuItem['color'] = u'one color'
            skuItem['size'] = u'one size'
            skuItem['id'] = skusDict['product']['sku_code']
            skuItem['from_site'] = item['from_site']
            if skusDict['product']['stock'] == 0:
                skuItem['is_outof_stock'] = True
            item['skus'].append(skuItem)
            item['sizes'] = [u'one size']
            yield item
Example #27
0
    def handle_parse_item(self, response, baseItem):

        sel = Selector(response)

        product_id = sel.xpath('//div[@id="productId"]/text()').extract()[0]

        baseItem['gender'] = 'men'
        baseItem['type'] = 'base'
        baseItem['from_site'] = self.name
        baseItem['show_product_id'] = product_id

        baseItem['title'] = sel.xpath(
            '//span[@class="row product-title"]/text()').extract()[0].strip()
        size_fit_container = sel.xpath('//div[@id="sizeFitContainer"]')
        if len(size_fit_container) > 0:
            size_fit = size_fit_container.extract()[0]
            baseItem['desc'] = '<div>' + sel.xpath(
                '//div[@itemprop="description"]').extract(
                )[0] + size_fit + "</div>"
        else:
            baseItem['desc'] = sel.xpath(
                '//div[@itemprop="description"]').extract()[0]
        baseItem['dimensions'] = ['size', 'color']
        skus = []
        product_detail_str = "".join(
            re.findall(r"var\s+productDetail[^;]+", response.body))
        if len(product_detail_str) > 0:
            context = execjs.compile('''
                %s
                function get_product_detail(){
                    return productDetail;
                    }
            ''' % (product_detail_str))
        product_detail = context.call('get_product_detail')
        size_js_infos = product_detail['sizes']
        size_infos = {}
        size_values = []
        for size_id in size_js_infos:
            size_infos[size_js_infos[size_id]['sizeCode']] = size_id
            size_values.append(size_id)
        list_price = sel.xpath(
            '//div[@id="productPrices"]//meta[@itemprop="price"]/@content'
        ).extract()[0]
        color_price_blocks = sel.xpath(
            '//div[@id="productPrices"]//div[@class="priceBlock"]')
        #         color_price_mapping = {}
        #         for color_price_block in color_price_blocks:
        #             color_name = color_price_block.xpath(
        #                 './span[@class="priceColors"]/text()').extract()
        #             if len(color_name) > 0:
        #                 regular_price_span = color_price_block.xpath(
        #                     './span[@class="regularPrice"]/text()').extract()
        #                 if len(regular_price_span) > 0:
        #                     color_price_mapping[color_name[0]] = regular_price_span[0]
        #                 else:
        #                     color_price_mapping[color_name[0]] = color_price_block.xpath(
        #                         './span[@class="salePrice"]/text()').extract()[0]
        match = re.search(r'productPage\.sellingPrice\=\'([\d\.]+)\';',
                          response.body)
        if match is None:
            current_price = list_price
        else:
            current_price = match.group(1)

        image_items = product_detail['colors']
        color_names = []
        for key in image_items:
            imageItems = image_items[key]['images']
            color_name = image_items[key]['colorName'].strip() + '-' + str(key)
            color_names.append(color_name)
            images = []
            tmp_images = []
            for image_key in imageItems:
                imageItem = ImageItem()
                image = imageItems[image_key]
                imageItem['thumbnail'] = image['thumbnail']
                imageItem['image'] = image['zoom']
                tmp_images.append((image['index'], imageItem))
            tmp_images = sorted(tmp_images, key=lambda x: x[0])
            for tmp_tuple in tmp_images:
                images.append(tmp_tuple[1])
            colorItem = Color()
            colorItem['type'] = 'color'
            colorItem['show_product_id'] = baseItem['show_product_id']
            colorItem['from_site'] = self.name
            colorItem['cover'] = image_items[key]['swatch']
            colorItem['name'] = color_name
            colorItem['images'] = images
            yield colorItem
            sizes = image_items[key]['sizes']
            for size in sizes:
                size_name = size_infos[size]
                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['from_site'] = self.name
                skuItem['color'] = color_name
                skuItem['show_product_id'] = baseItem['show_product_id']
                skuItem['id'] = key + "-" + size
                skuItem['size'] = size_name
                skuItem['list_price'] = list_price
                skuItem['current_price'] = current_price
                #                 if len(color_price_mapping) > 0 and color_name in color_price_mapping.keys():
                #                     skuItem['current_price'] = color_price_mapping[
                #                         colorItem['name']]
                #                 else:
                #                     skuItem['current_price'] = skuItem['list_price']
                skuItem['is_outof_stock'] = False
                skus.append(skuItem)
        baseItem['sizes'] = size_values
        baseItem['colors'] = color_names
        baseItem['skus'] = skus

        product_items = sel.xpath(
            '//ul[@id="similarities"]/li[@class="product"]')
        if len(product_items) > 0:
            related_items_id = []
            for product_item in product_items:
                product_id = product_item.xpath(
                    './div/div[@class="info"]/img/@data-product-id').extract(
                    )[0]
                related_items_id.append(product_id)
            if related_items_id:
                baseItem['related_items_id'] = related_items_id
        yield baseItem
    def handle_parse_item(self, response, baseItem):
        product_detail_str="".join(re.findall(r"var\s+productDetail[^;]+", response.body))
        if len(product_detail_str)>0:
            context = execjs.compile('''
                %s
                function get_product_detail(){
                    return productDetail;
                    }
            ''' % (product_detail_str))
        product_detail = context.call('get_product_detail')
        sel = Selector(response)
        product_id = sel.xpath('//div[@id="productId"]/text()').extract()[0]
        skus = []
        baseItem['from_site'] = self.name
        baseItem['show_product_id'] = product_id
        
        size_js_infos = product_detail['sizes']
        size_infos = {}
        size_values = []
        for size_id in size_js_infos:
            size_infos[size_js_infos[size_id]['sizeCode']] = size_id
            size_values.append(size_id)

        list_price = sel.xpath('//div[@id="productPrices"]//meta[@itemprop="price"]/@content').extract()[0]

        color_price_blocks = sel.xpath('//div[@id="productPrices"]//div[@class="priceBlock"]')
        color_price_mapping = {}
        for color_price_block in color_price_blocks:
            color_name = color_price_block.xpath('./span[@class="priceColors"]/text()').extract()
            
            if len(color_name) > 0:
                regular_price_span = color_price_block.xpath('./span[@class="regularPrice"]/text()').extract()
                if len(regular_price_span) > 0:
                    color_price_mapping[color_name[0]] = regular_price_span[0]
                else:
                    color_price_mapping[color_name[0]] = color_price_block.xpath('./span[@class="salePrice"]/text()').extract()[0]
        
        image_items = product_detail['colors']

        color_names = []
        for key in image_items:
            imageItems = image_items[key]['images']
            color_name = image_items[key]['colorName'].strip()
            
            color_names.append(color_name)
            
            images=[]
            tmp_images = []
            for image_key in imageItems:
                imageItem = ImageItem()
                image = imageItems[image_key]
                
                imageItem['thumbnail'] = image['thumbnail']
                imageItem['image'] = image['zoom']
                
                tmp_images.append((image['index'], imageItem))
                
            tmp_images = sorted(tmp_images, key=lambda x:x[0])

            for tmp_tuple in tmp_images:
                images.append(tmp_tuple[1])
            
            colorItem = Color()
            colorItem['type'] = 'color'
            colorItem['show_product_id'] = product_id
            colorItem['from_site'] = self.name
            colorItem['cover'] = image_items[key]['swatch']
            colorItem['name'] = color_name
            colorItem['images'] = images
            
            yield colorItem
            
            sizes = image_items[key]['sizes']
            
            for size in sizes:
                size_name = size_infos[size]
                
                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['from_site'] = self.name
                skuItem['color'] = color_name
                skuItem['show_product_id'] = product_id
                skuItem['id'] = key+"-"+size
                skuItem['size'] = size_name
                skuItem['list_price'] = list_price
                if len(color_price_mapping)>0 and color_name in color_price_mapping.keys():
#                     skuItem['current_price'] = sale_price_span.re(r'\d+.?\d*')[0]
                    skuItem['current_price'] = color_price_mapping[colorItem['name']]
                else:
                    skuItem['current_price'] = skuItem['list_price']
                skuItem['is_outof_stock'] = False
                skus.append(skuItem)

        baseItem['sizes'] = size_values
        baseItem['colors']= color_names
        baseItem['skus'] = skus
        size_fit_container = sel.xpath('//div[@id="sizeFitContainer"]')
        if len(size_fit_container)>0:
            size_fit = size_fit_container.extract()[0]
            baseItem['desc'] = '<div>'+sel.xpath('//div[@itemprop="description"]').extract()[0]+size_fit+"</div>"
        else:
            baseItem['desc'] = sel.xpath('//div[@itemprop="description"]').extract()[0]
        baseItem['dimensions'] = ['size', 'color']
        yield baseItem
Example #29
0
    def handle_parse_item(self, response, item):
        pImgStr = "".join(re.findall(r'(pImgs[^;]+;)+', response.body))

        context = execjs.compile('''
            %s
            function getPImgs(){
                return pImgs;
            }
        ''' % pImgStr)

        pImgs = context.call('getPImgs')

        sel = Selector(response)

        outofstock_result = re.search(r'outOfStock[\s]*=[\s]*([^;]+);',
                                      response.body)

        if outofstock_result and outofstock_result.group(1) == 'true':
            return

        stock_json_result = re.search(r'var stockJSON[\s]*=[\s]*([^;]+);',
                                      response.body)

        if stock_json_result:
            stock_dic = eval(stock_json_result.group(1))

        if stock_dic:
            color_price_dic = eval(
                re.search(r'colorPrices[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            style_id_dic = eval(
                re.search(r'styleIds[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            product_gender = eval(
                re.search(r'productGender[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            zeta_categories = eval(
                re.search(r'zetaCategories[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            category = eval(
                re.search(r';[\s]*category[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            sub_category = eval(
                re.search(r'subCategory[\s]*=[\s]*("[^"]+"[\s]*);',
                          response.body).group(1))

            dimension_dic = eval(
                re.search(r'dimensions[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            #dimToUnitToValJSON = eval(re.search(r'dimToUnitToValJSON[\s]*=[\s]*([^;]+);', response.body).group(1))
            dimensionIdToNameJson = eval(
                re.search(r'dimensionIdToNameJson[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            valueIdToNameJSON = eval(
                re.search(r'valueIdToNameJSON[\s]*=[\s]*([^;]+);',
                          response.body).group(1))
            colorNames = eval(
                re.search(r'colorNames[\s]*=[\s]*({[^}]+}[\s]*);',
                          response.body).group(1))

            if len(zeta_categories) > 0:
                item['product_type'] = zeta_categories[0].values()[0]

                if category == item['product_type']:
                    item['category'] = sub_category
                else:
                    item['category'] = category
                    item['sub_category'] = sub_category
            else:
                item['product_type'] = category
                item['category'] = sub_category

            if 'gender' in response.meta.keys():
                meta_gender = response.meta['gender']

                if product_gender.lower == 'unisex':
                    if meta_gender == 'boys' or meta_gender == 'girls':
                        item['gender'] = 'kid-unisex'
                    else:
                        item['gender'] = 'unisex'
                else:
                    item['gender'] = meta_gender
            '''跳过描述,过于复杂'''
            size_info_images = []
            desc = sel.xpath(
                '//div[@id="productDescription"]//div[@itemprop="description"]/ul'
            ).extract()

            if len(desc) > 0:
                item['desc'] = desc[0]

                size_infos = sel.xpath(
                    '//div[@id="productDescription"]//div[@itemprop="description"]/ul/li/a[@class="popup-570-550"]'
                )

                if len(size_infos) > 0:
                    size_info_images = []
                    for size_info in size_infos:
                        size_info_image_url = size_info.xpath(
                            '@href').extract()[0]

                        if not re.match(r'^http:\/\/', size_info_image_url):
                            size_info_image_url = self.base_url + size_info_image_url
                        size_info_images.append(size_info_image_url)

            else:
                desc_ul = sel.xpath(
                    '//div[@id="prdInfoText"]//span[@class="description summary"]/ul'
                ).extract()

                if len(desc_ul) == 0:
                    return

                item['desc'] = desc_ul[0]

                size_infos = sel.xpath(
                    '//div[@id="prdInfoText"]//span[@class="description summary"]/ul/li/a[@class="popup-570-550"]'
                )

                if len(size_infos) > 0:
                    size_info_images = []
                    for size_info in size_infos:
                        size_info_image_url = size_info.xpath(
                            '@href').extract()[0]

                        if not re.match(r'^http:\/\/', size_info_image_url):
                            size_info_image_url = self.base_url + size_info_image_url
                        size_info_images.append(size_info_image_url)

            if len(size_info_images) > 0:
                item['size_info'] = {'images': size_info_images}

            colors = []
            '''处理color'''
            for (color, color_name) in colorNames.items():
                colorItem = Color()

                colorItem['type'] = 'color'
                colorItem['from_site'] = self.name
                colorItem['show_product_id'] = item['show_product_id']
                colorItem['name'] = color_name
                colors.append(color_name)

                styleId = str(style_id_dic[color])
                #colorItem['cover'] = sel.xpath('//a[@id="frontrow-'+color+'"]/img/@src').extract()[0]
                if 'p' in pImgs[styleId]['DETAILED'].keys():
                    colorItem['cover'] = pImgs[styleId]['DETAILED']['p']
                elif 'd' in pImgs[styleId]['DETAILED'].keys():
                    colorItem['cover'] = pImgs[styleId]['DETAILED']['d']
                elif '1' in pImgs[styleId]['MULTIVIEW_THUMBNAILS'].keys():
                    colorItem['cover'] = pImgs[styleId][
                        'MULTIVIEW_THUMBNAILS']['1']
                elif '4' in pImgs[styleId]['MULTIVIEW_THUMBNAILS'].keys():
                    colorItem['cover'] = pImgs[styleId][
                        'MULTIVIEW_THUMBNAILS']['4']
                elif '5' in pImgs[styleId]['MULTIVIEW_THUMBNAILS'].keys():
                    colorItem['cover'] = pImgs[styleId][
                        'MULTIVIEW_THUMBNAILS']['5']

                colorImages = pImgs[styleId]
                thumbImages = colorImages['MULTIVIEW_THUMBNAILS']
                images = colorImages['2x']
                if len(images) == 0:
                    images = colorImages['MULTIVIEW']

                thumbImages = sorted(thumbImages.iteritems(),
                                     key=lambda d: d[0])

                images_array = []
                for image_tuple in thumbImages:
                    imageItem = ImageItem()

                    if image_tuple[0] in images.keys():
                        imageItem['image'] = images[image_tuple[0]]
                        imageItem['thumbnail'] = image_tuple[1]

                        if image_tuple[0] == 'p' or image_tuple[0] == 'd':
                            images_array.insert(0, imageItem)
                        else:
                            images_array.append(imageItem)

                colorItem['images'] = images_array

                yield colorItem

            item['colors'] = colors

            dimensions = []
            sizes = {}
            for dimension in dimension_dic:
                dimensions.append(dimensionIdToNameJson[dimension])
                sizes[dimensionIdToNameJson[dimension]] = []

            if len(dimensions) == 0:
                dimensions = ['size']

            if len(sizes) == 0:
                sizes = {'size': ['onesize']}

            item['dimensions'] = dimensions
            '''处理sku库存'''
            skuCollectionsList = []
            for sku_stock in stock_dic:

                color = sku_stock['color']

                if color in color_price_dic.keys():
                    skuItem = SkuItem()
                    skuItem['type'] = 'sku'
                    skuItem['from_site'] = self.name
                    skuItem['show_product_id'] = item['show_product_id']
                    skuItem['id'] = sku_stock['id']

                    skuItem["list_price"] = color_price_dic[color]['wasInt']
                    skuItem['current_price'] = color_price_dic[color]['nowInt']

                    skuItem['color'] = colorNames[color]

                    size_demension = {}
                    for demension in dimension_dic:
                        if demension in sku_stock.keys(
                        ) and sku_stock[demension] in valueIdToNameJSON.keys():
                            size_value = valueIdToNameJSON[
                                sku_stock[demension]]['value']
                            size_demension[
                                dimensionIdToNameJson[demension]] = size_value

                            if not size_value in sizes[
                                    dimensionIdToNameJson[demension]]:
                                sizes[dimensionIdToNameJson[demension]].append(
                                    size_value)

                    if len(size_demension) == 0:
                        size_demension = {'size': 'onesize'}

                    skuItem['size'] = size_demension
                    skuItem['quantity'] = sku_stock['onHand']
                    skuItem['is_outof_stock'] = False

                    skuCollectionsList.append(skuItem)

            item['skus'] = skuCollectionsList
            item['sizes'] = sizes

            item = self.handle_dimension_to_name(response, item,
                                                 dimensionIdToNameJson)

            yield item
Example #30
0
    def parse_skus(self, response):
        item = response.meta['item']
        images = response.meta['images']
        yielded_coloritems = response.meta['yielded_coloritems']
        if 'errorpage' in response.url:
            return
        body_json = json.loads(response.body)
        detail_str = body_json['ProductSizeAndColor'][
            'productSizeAndColorJSON']
        detail_json = json.loads(detail_str)
        # if len(detail_json) >1:
        #     print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!multi skus: ', body_json['ProductSizeAndColor']['productIds'] + item['url']
        color_names = []
        sizes = []
        item['skus'] = []
        for skus_detail in detail_json:
            for sku_detail in skus_detail['skus']:
                if 'color' in sku_detail.keys():
                    if re.findall('\?.+\?false', sku_detail['color']):
                        color_name = re.sub('\?.+\?false', '',
                                            sku_detail['color'])
                        # color_name = sku_detail['color'].replace('?1?false', '').strip()
                    elif '?' in sku_detail['color']:
                        print sku_detail['color']
                        raise NameError('colorname error ' +
                                        sku_detail['color'] + ' ' +
                                        item['url'])
                    else:
                        color_name = sku_detail['color'].strip()
                else:
                    color_name = 'One Color'

                if color_name not in color_names:
                    color_names.append(color_name)

                if 'size' in sku_detail.keys() and sku_detail['size']:
                    size = sku_detail['size']
                else:
                    size = 'One Size'
                if size not in sizes:
                    sizes.append(size)

                skuItem = SkuItem()
                skuItem['type'] = 'sku'
                skuItem['show_product_id'] = item['show_product_id']
                skuItem['list_price'] = item['list_price']
                skuItem['current_price'] = item['current_price']
                skuItem['color'] = color_name
                skuItem['size'] = size
                skuItem['id'] = sku_detail['sku']
                skuItem['from_site'] = item['from_site']
                skuItem['is_outof_stock'] = False
                if sku_detail['status'] != 'In Stock' and sku_detail[
                        'status'] != 'InStock':
                    print 'stock status: ', sku_detail['status']
                    # skuItem['is_outof_stock'] = True
                item['skus'].append(skuItem)

        if not yielded_coloritems:
            for color_name in color_names:
                colorItem = Color()
                colorItem['images'] = images
                colorItem['type'] = 'color'
                colorItem['from_site'] = item['from_site']
                colorItem['show_product_id'] = item['show_product_id']
                colorItem['name'] = color_name
                if not images:
                    # raise Exception('no image url: ' + item['url'])
                    return
                colorItem['cover'] = images[0]['thumbnail']
                yield colorItem
        item['sizes'] = sizes
        item['colors'] = color_names
        yield item