def parse_product(self, response): loader = ProductLoader(item=Product(), response=response) loader.add_value('url', response.url) loader.add_xpath('name', '//h1[@itemprop="name"]/text()') if not loader.get_collected_values('name'): return loader.add_xpath('price', '//span[@class="full-price"]/text()') stock = response.xpath( '//div[contains(@class, "low-stock")]') or response.xpath( '//div[contains(@class, "no-stock")]') if stock: loader.add_value('stock', 0) categories = response.xpath( '//ul[@class="the-breadcrumb-list"]//span[@itemprop="title"]/text()' ).extract() for category in categories: if category.title() not in ('Home', 'Search Results'): loader.add_value('category', category) #loader.add_xpath('category', '//li[@class="terain-type"]/text()') brand = response.meta.get('brand') if not brand: brand = response.xpath( '//div[@class="product-brand"]/a/@href').extract()[0] loader.add_value('brand', brand.strip('/').replace('-', ' ')) loader.add_xpath('identifier', response.url.rpartition('_')[-1]) loader.add_value('sku', response.url.rpartition('_')[-1]) loader.add_xpath('image_url', '//img[@itemprop="image"]/@src') yield loader.load_item()
def parse_product(self, response): hxs = HtmlXPathSelector(response) base_url = get_base_url(response) product_loader = ProductLoader(item=Product(), selector=hxs) product_name = hxs.select('//h1[@itemprop="name"]/text()').extract_first() image_url = response.css('.js-main-image').xpath('@src').extract_first() product_loader.add_value('image_url', response.urljoin(image_url)) product_loader.add_value('name', product_name) product_loader.add_value('url', response.url) identifier = hxs.select('//input[@name="id"]/@value').extract_first() product_loader.add_value('identifier', identifier) sku = response.css('.js-product-info').xpath('@data-product').re('"erpId":.+"(.+)"') sku = sku[0] if sku else '' product_loader.add_value('sku', sku) price = response.css('.js-product-info').xpath('@data-product').re('"priceCurrent":(.+),') price = price[0] if price else '' product_loader.add_value('price', price) if product_loader.get_collected_values('price') and product_loader.get_collected_values('price')[0] < 1000: product_loader.add_value('shipping_cost', '49') out_of_stock = hxs.select('//form[@id="block-monitor-product-form"]') if out_of_stock: product_loader.add_value('stock', 0) product = product_loader.load_item() yield product