Example #1
0
    def parse_product_size(self, response):
        designer_info = self.designer_info_dict[response.meta['uid']]
        designer = designer_info.designer
        product_size_url = response.meta['product_size_url']
        product_id = response.meta['product_id']
        self.logger.info(u'parse product size[%s] response, response status: %d', product_size_url, response.status)
        product = filter(lambda p: p['uid'] == product_id, designer['products'])[0]

        sel = Selector(text=skutils.get_first(response.xpath('//script[@id="sizechart-modal"]/text()').extract()))
        tr_nodes = sel.xpath('//table[@class="size-conversion-table"]//tr')

        design_size = [[skutils.get_first(td.xpath('text()').extract()) for td in tr_node.xpath('td')]
                       for tr_node in tr_nodes]

        product['design_size'] = design_size

        return self.try_return_designer_if_last_product_detail_page(response.meta['uid'])
Example #2
0
    def parse_designer(self, response):
        self.logger.info('Hi, this is designer page! %s', response.url)
        designer = DesignerItem()

        uid = skutils.get_first(response.xpath('//ul[@class="dropdown-menu"]/li/@data-designer-id').extract())
        name = skutils.get_first(response.xpath('//div[@class="designer-info-wrap"]/h1/text()').extract())
        desc_part1 = skutils.get_first(response.xpath('//div[@class="designer-info-wrap"]/p/text()').extract())
        desc_part2 = skutils.get_first(
            response.xpath('//div[@class="designer-info-wrap"]/p/span/text()').extract())
        desc = desc_part1.strip() if desc_part1 else "" + desc_part2.strip() if desc_part2 else ""
        img_url = skutils.get_first(response.xpath('//div[@class="designer-avatar"]/img/@src').extract())
        nation = skutils.get_first(response.xpath('//div[@class="designer-avatar"]/div/text()').extract())

        designer['uid'] = uid.strip() if uid else ""
        designer['name'] = name.strip() if name else ""
        designer['url'] = response.url
        designer['desc'] = desc
        designer['img_url'] = (CarnetSpider.DOMAIN_PREFIX + img_url.strip()) if img_url else ""
        designer['nation'] = nation.strip() if nation else ""
        designer['product_detail_urls'] = []
        designer['products'] = []

        designer['file_urls'] = [designer['img_url']]  # for download

        uid = designer['uid']
        if not uid:
            # designer have no products
            return designer

        self.designer_info_dict[uid] = DesignerInfo(uid, designer)

        products_request = self.make_products_list_request(designer, 0, 0)

        return products_request
Example #3
0
    def parse_product_detail(self, response):
        designer_info = self.designer_info_dict[response.meta['uid']]
        designer = designer_info.designer
        detail_url = response.meta['detail_url']
        self.logger.info('parse product detail[%s] response, response status: %d', detail_url, response.status)
        product = ProductItem()

        name = skutils.get_first(
            response.xpath('//div[@class="product-info"]/h1[@class="hidden-xs"]/text()').extract())
        price = skutils.get_first(
            response.xpath('//span[@class="price hidden-xs"]/span[@class="cdm-price-1"]/text()').extract())
        if not price:
            price = skutils.get_first(
                response.xpath('//span[@class="price hidden-xs"]/span[@class="cdm-price-2"]/text()').extract())
        original_price = skutils.get_first(
            response.xpath('//span[@class="bottom-price"]/span[@class="real-price cdm-price-3"]/text()').extract())
        size_nodes = response.xpath('//select[@id="size-select"]/option')
        size_info = [{
                         'size': skutils.get_first(s.xpath('text()').extract()).strip(),
                         'product_id': skutils.get_first(s.xpath('@data-product-id').extract()),
                         'stock': skutils.get_first(s.xpath('@data-stock').extract()),
                         'selected': (skutils.get_first(s.xpath('@selected').extract()) == "selected")
                     } for s in size_nodes]
        desc = response.xpath('//div[@class="panel-collapse in hidden-xs"]/div[@class="panel-body"]//text()').extract()
        design_size = skutils.get_first(response.xpath('//table[@class="table table-bordered"]').extract())
        img_url = response.xpath('//a[@data-image]/@data-image').extract()

        product['uri'] = detail_url
        product['name'] = name.strip() if name else ""
        product['price'] = price.strip() if price else ""
        product['original_price'] = original_price.strip() if original_price else ""
        product['size_info'] = size_info
        product['current_size'] = filter(lambda x: x.get('selected'), size_info)[0]['size']
        product['desc'] = " ".join(desc).strip()
        product['design_size'] = skutils.remove_html_attributes(design_size)
        product['img_url'] = [CarnetSpider.DOMAIN_PREFIX + x.strip() for x in img_url]
        product['stock'] = designer_info.products[detail_url]['stock']
        product['uid'] = designer_info.products[detail_url]['uid']

        designer['file_urls'].extend(product['img_url'])  # for download

        designer['products'].append(product)

        return self.try_return_designer_if_last_product_detail_page(response.meta['uid'])
Example #4
0
    def parse_designer(self, response):
        self.logger.info(u'Hi, this is designer page! %s', response.url)
        designer = SsenseDesignerItem()

        name = skutils.get_first(
            response.xpath('//div[contains(@class, "browsing-designer-header-content")]/h1/text()').extract())
        desc = skutils.get_first(
            response.xpath('//div[contains(@class, "browsing-designer-header-content")]/p/text()').extract())

        self.index += 1
        designer['uid'] = self.index
        designer['name'] = name.strip() if name else ""
        designer['url'] = response.url
        designer['desc'] = desc
        designer['product_detail_urls'] = []
        designer['products'] = []

        designer['file_urls'] = []

        uid = designer['uid']

        designer_info = DesignerInfo(uid, designer)
        self.designer_info_dict[uid] = designer_info

        product_detail_urls = [SsenseSpider.DOMAIN_PREFIX + x for x in
                               response.xpath('//div[@class="browsing-product-item"]/a/@href').extract()]

        if product_detail_urls:
            SsenseSpider.filter_product(designer, product_detail_urls)
            product_detail_urls = designer['product_detail_urls']
            designer_info.remain_detail_page = len(product_detail_urls)
            for detail_url in product_detail_urls:
                yield self.make_products_detail_request(detail_url, designer)
        else:
            # designer don't have products
            yield designer
Example #5
0
    def parse(self, response):
        designer_info = self.designer_info_dict[1]
        designer = designer_info.designer
        url = response.url
        detail_url = url.split('/')[-1]
        self.logger.info(u'parse product detail[%s] response, response status: %d', url, response.status)
        product = PortraitProductItem()

        name = skutils.get_first(response.xpath('//div[@class="product-name"]/h1/text()').extract())
        img_url = []
        img_url.extend(response.xpath('//div[@class="product-image"]//img/@src').extract())
        img_url.extend(response.xpath('//div[@class="product-image-bottom"]//img/@src').extract())

        self.index += 1
        product['uid'] = str(self.index)
        product['uri'] = detail_url
        product['name'] = name
        product['img_url'] = img_url

        designer['file_urls'].extend(product['img_url'])  # for download

        designer['products'].append(product)

        return self.try_return_designer_if_last_product_detail_page(1)
Example #6
0
    def parse_product_detail(self, response):
        designer_info = self.designer_info_dict[response.meta['uid']]
        designer = designer_info.designer
        detail_url = response.meta['detail_url']
        self.logger.info(u'parse product detail[%s] response, response status: %d', detail_url, response.status)
        product = SsenseProductItem()

        product_nodes = response.xpath('//div[@class="product-description-container"]')
        uid = skutils.get_first(product_nodes.xpath('@data-product-id').extract())
        name = skutils.get_first(product_nodes.xpath('@data-product-name').extract())
        sku = skutils.get_first(product_nodes.xpath('@data-product-sku').extract())
        category_id = skutils.get_first(product_nodes.xpath('@data-product-category-id').extract())
        price = skutils.get_first(product_nodes.xpath('@data-product-price').extract())
        size_nodes = response.xpath('//select[@id="size"]/option[position()>1]')
        size_info = [{
                         'size': skutils.get_first(s.xpath('text()').extract()).strip(),
                         'stock': '0' if skutils.get_first(s.xpath('@disabled').extract()) == 'disabled' else None
                     } for s in size_nodes]
        desc = response.xpath('//p[contains(@class, "product-description-text")]//text()').extract()
        img_url = response.xpath('//div[@class="image-wrapper"]//img/@data-src').extract()

        product['uri'] = detail_url
        product['name'] = name.strip() if name else ""
        product['price'] = "$" + price.strip() if price else ""
        product['size_info'] = size_info
        product['desc'] = " ".join(desc).strip()
        product['img_url'] = img_url
        product['uid'] = uid
        product['sku'] = sku
        product['category_id'] = category_id

        designer['file_urls'].extend(product['img_url'])  # for download

        designer['products'].append(product)

        return self.make_products_size_request(designer, uid, category_id)