예제 #1
0
 def parse_product(self, response):
     loader = ProductLoader(item=Product(), response=response)
     loader.add_xpath('url', '//link[@rel="canonical"]/@href')
     loader.add_xpath('name', '//span[@id="productName"]//text()')
     loader.add_xpath('sku', '//span[@id="productEAN"]/text()[last()]')
     loader.add_xpath('category', '//div[@id="breadcrumb"]/ul/li[position()>1]/a/span/text()')
     loader.add_css('image_url', '.productImageItem ::attr(href)')
     brand = response.css('.brand ::text').extract_first()
     if brand != "null":
         loader.add_value('brand', brand)
     item = loader.load_item()
     
     p = re.compile('stockMatrix = (.+?);', re.DOTALL)
     data = response.xpath('//script/text()').re(p)
     options = json.loads(data[0])
     for option in options:
         loader = ProductLoader(item=Product(), response=response)
         loader.add_value(None, item)
         opt_iter = iter(option)
         opt_name = ''
         for attribute in response.css('.skuAttribute'):
             opt_name = opt_iter.next()
             loader.add_value('name', opt_name)
         colour_url = response.xpath('//input[@class="colourImageUrl"][@name="%s"]/@value' %opt_name).extract_first()
         if colour_url:
             loader.replace_value('image_url', 'http://media.littlewoods.com/i/littlewoods/%s?$1064x1416_standard$' %colour_url)
         loader.replace_value('identifier', opt_iter.next())
         stock = opt_iter.next()
         if stock.startswith('Unavailable'):
             continue
         loader.replace_value('stock', int('Out of stock' not in stock))
         loader.replace_value('price', opt_iter.next())
         yield loader.load_item()
예제 #2
0
 def parse_product(self, response):
     identifier = response.xpath('//div[@itemscope]/@id').re('product-(.+)')
     loader = ProductLoader(item=Product(), response=response)
     loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
     loader.add_xpath('price', '//meta[@itemprop="price"]/@content')
     loader.add_xpath('url', '//link[@rel="canonical"]/@href')
     category = response.css('.breadcrumb a::text').extract()[1:]
     loader.add_value('category', category)
     loader.add_value('brand', response.meta['brand'])
     loader.add_xpath('image_url', '//div/@data-original-img')
     loader.add_value('identifier', identifier)
     product = loader.load_item()
     if not response.css('.variations'):
         yield product
         return
     
     variations = response.xpath('//form/@data-product_variations').extract_first()
     variations = json.loads(variations)
     for variation in variations:
         variation_loader = ProductLoader(item=Product(product), response=response)
         attributes = variation['attributes'].values()
         variation_loader.replace_value('name', product['name'])
         for attribute in attributes:
             variation_loader.add_xpath('name', '//option[@value="%s"]/text()' %attribute)
         variation_loader.replace_value('price', variation['display_price'])
         variation_loader.replace_value('identifier', variation['variation_id'])
         yield variation_loader.load_item()
예제 #3
0
 def parse_product(self, response):
     
     
     hxs = HtmlXPathSelector(response)
     base_url = get_base_url(response)
     loader = ProductLoader(item=Product(), selector=hxs)
     loader.add_xpath('category', '//li[@typeof="v:Breadcrumb"]/a[@href!="/"]/text()')
     brand = hxs.select('//script[@type="text/javascript"]/text()').re('brand: *\"(.+)\"')
     loader.add_value('brand', brand)
     loader.add_xpath('image_url', '//div[@id="amp-originalImage"]/img/@src')
     loader.add_value('url', url_query_cleaner(response.url))
     loader.add_xpath('name', '//input[@name="speedtrapProductDisplayName"]/@value')
     item = loader.load_item()
     if hxs.select('//ul[@class="productOptionsList"]/li[contains(@class, "skuAttribute")]'):
         data = hxs.select('//script[contains(text(),"stockMatrix =")]/text()')[0].extract()
         data = data.replace('\n', '').replace('null', '"null"')
         data = re.search('stockMatrix = (.*?);', data, re.DOTALL)
         data = json.loads(data.group(1)) if data else []
         for i, variant in enumerate(data):
             sku = [elem for elem in variant if elem.startswith('sku')][0]
             sku_idx = variant.index(sku)
             product = Product(item)
             product['name'] = item['name'] + ' - ' + ' '.join(variant[:sku_idx]).title()
             product['identifier'] = '{}-{}'.format(response.meta.get('row').get('PRODUCT_NUMBER'), i)
             product['sku'] = product['identifier']
             product['price'] = variant[sku_idx + 2]
             product['stock'] = 1 if 'Available#Delivery' in variant[sku_idx + 1] else 0
             yield product
         return
     loader.add_value('identifier', response.meta.get('row').get('PRODUCT_NUMBER'))
     loader.add_value('sku', response.meta.get('row').get('PRODUCT_NUMBER'))
     loader.add_xpath('price', '//input[@name="speedtrapPrice"]/@value')
     stock = 1 if hxs.select('//meta[@property="product:availability"]/@content[.="In Stock"]') else 0
     loader.add_value('stock', stock)
     yield loader.load_item()
예제 #4
0
    def parse_product(self, response):
        loader = ProductLoader(Product(), response=response)
        identifier = re.search('\d\d\d\d', response.url).group(0)
        loader.add_value('identifier', identifier)
        loader.add_value('sku', identifier)
        loader.add_value('url', response.url)
        loader.add_xpath('name', '//header[@class="prodCat"]/h1/text()')
        category = response.css('.bread li a::text').extract()[1:]
        category += response.css('.bread li:last-child::text').extract()
        loader.add_value('category', category)
        image_url = response.css('.detimg a::attr(href)').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        item = loader.load_item()

        options = response.css('.tbl').xpath('.//*[@class="tr"]')
        if not options:
            item['price'] = 0
            yield item
            return
        for option in options:
            loader = ProductLoader(Product(), selector=option)
            loader.add_value(None, item)
            identifier = option.xpath('.//input/@name').extract_first()
            loader.replace_value('identifier', identifier)
            loader.replace_value('sku', identifier)
            loader.replace_css('price', '.tc-price .pr-now::text')
            loader.add_css('price', '.tc-price::text')
            loader.replace_css('name', '.tc-title::text')
            yield loader.load_item()
예제 #5
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     loader.add_value('url', response.url)
     loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
     sku = response.xpath('//div[@itemprop="description"]/div/div[last()]/text()').extract_first()
     loader.add_value('identifier', sku)
     loader.add_value('sku', sku)
     category = response.css('.breadcrumbs a::text').extract()[1:]
     category += response.css('.breadcrumbs li:last-of-type::text').extract()
     loader.add_value('category', category)
     image_url = response.css('img.gallery-main-image::attr(src)').extract_first()
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url))
     if not response.css('.in-stock'):
         loader.add_value('stock', 0)       
     item = loader.load_item()
     
     options = response.css('table.product-table tbody tr')
     for option in options:
         loader = ProductLoader(Product(), selector=option)
         loader.add_value(None, item)
         sku = option.css('span.product-code::text').re('\((.+)\)')[0]
         name = option.css('span.product-name::text').extract_first()
         identifier = '-'.join((sku, hashlib.md5(item['name'] + name).hexdigest()))
         loader.replace_value('identifier', identifier)
         loader.replace_value('sku', sku)
         loader.add_css('price', 'span.product-price-rrp')
         price = option.css('td.product-price').xpath('text()[last()]').extract_first()
         loader.replace_value('price', price)
         if name not in item['name']:
             loader.add_value('name', name)
         yield loader.load_item()
         
예제 #6
0
    def parse_product(self, response):
        loader = ProductLoader(Product(), response=response)
        loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
        loader.add_xpath('url', '//link[@rel="canonical"]/@href')
        loader.add_xpath('price', '//h2[@itemprop="price"]/text()')
        category = response.xpath(
            '//div[@id="breadcrumbs"]/a/text()').extract()
        loader.add_value('category', category[1:-1])
        image_url = response.css('img.productimage::attr(src)').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        loader.add_value('shipping_cost', 10)

        loader.add_xpath('identifier',
                         '//link[@rel="canonical"]/@href',
                         re='\d+$')
        loader.add_xpath('sku', '//*/text()', re='Product code \#(.+)$')
        if response.xpath(
                "//*[contains(., 'SOLD OUT') or contains(., 'not available to buy online')]"
        ):
            loader.add_value('stock', 0)
        item = loader.load_item()

        options = response.xpath('//*[contains(@class, "sizeselect")]')
        if not options:
            yield item
            return

        for option in options:
            name = option.xpath('text()').extract_first()
            if not name:
                continue
            data = response.xpath('//span/text()[contains(., "size:%s")]' %
                                  name).extract_first().strip()
            sku = re.search('sku:(\d+)', data).group(1)
            if option.css('.sizeselectsoldout'):
                stock = 0
            else:
                stock = re.search('qty:(\d+)', data).group(1)
                if not stock or not int(stock):
                    stock = 1
            loader = ProductLoader(Product(), response=response)
            loader.add_value(None, item)
            loader.add_value('name', name)
            loader.replace_value('identifier', sku)
            loader.replace_value('sku', sku)
            loader.replace_value('stock', stock)
            pr = loader.load_item()
            pr['metadata'] = {'size': name}
            yield pr
예제 #7
0
    def parse_product(self, response):
        loader = ProductLoader(response=response, item=Product())
        loader.add_value('url', response.url)
        sku = response.xpath('//input[@id="productSku"]/@value').extract_first()
        loader.add_value('identifier', sku)
        loader.add_value('sku', sku)
        loader.add_xpath('brand', '//span[@itemprop="brand"]/text()')
        category = response.xpath('//div[@class="breadcrumbs"]//li/a/text()').extract()[-3:]
        loader.add_value('category', category)
        loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
        loader.add_xpath('price', '//span[@id="price-displayed"]/text()')
        image_url = response.xpath('//a[@id="productImage"]/img/@src').extract()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url[0]))

        item = loader.load_item()
        
        attributes = response.xpath('//br/preceding-sibling::label[@for!="input-quantity"]/text()').extract()
        options = response.xpath('//tr[@itemprop="offers"]')
        headers = map(lambda x:x.lower(), response.xpath('//table[@id="variant-table"]//th/text()').extract())
        attr_indexes = {headers.index(attr.lower()): attr for attr in attributes}

        if not options:
            yield item
            return
        
        for option in options:
            metadata = dict()
            option_name = []
            for idx in sorted(attr_indexes):
                value = option.xpath('.//td')[idx].xpath('.//text()').re_first(' *\S+.+')
                if value:
                    option_name.append(value.strip())
                    metadata[attr_indexes[idx]] = value.strip()
            
            loader = ProductLoader(Product(), selector=option)
            loader.add_value(None, item)
            loader.add_value('name', option_name)
            loader.replace_xpath('price', './/span[@itemprop="price"]/text()')
            loader.add_value('price', 0)
            loader.replace_xpath('identifier', './/input[contains(@name, "VariantSku")]/@value')
            loader.replace_xpath('sku', './/input[contains(@name, "VariantSku")]/@value')
            
            option_item = loader.load_item()
            option_item['metadata'] = metadata
            yield option_item


                
                
예제 #8
0
    def parse_product(self, response):
        if 'aspxerrorpath' in response.url:
            yield Request(response.request.meta['redirect_urls'][0],
                          self.parse_product,
                          dont_filter=True)
            return
        loader = ProductLoader(Product(), response=response)
        identifier = response.xpath('//@data-feefo-vendor-ref').extract_first()
        loader.add_value('identifier', identifier)
        loader.add_value('url', response.url)
        loader.add_css('name', 'header.page-title h1::text')
        loader.add_css('price', 'header.product-sidebar__price h2::text')
        loader.add_value('sku', identifier)
        category = response.css('.breadcrumb a::text').extract()
        loader.add_value('category', category[1:-1])
        image_url = response.css(
            '.product-gallery__main-image img::attr(src)').extract_first()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url))
        stock = response.css('.product-sidebar__stock::text').extract_first()
        if not 'Order Now' in stock.title():
            loader.add_value('stock', 0)
        item = loader.load_item()
        if 'Discontinued' in stock.title():
            item['metadata'] = {"Discontinued?": "Yes"}

        option_types = response.css('.product-sidebar select')
        if not option_types:
            yield item
            return

        options = []
        for option_type in option_types:
            options.append(option_type.xpath('option[@value!="Select"]'))
        variants = itertools.product(*options)

        for variant in variants:
            loader = ProductLoader(Product(), response=response)
            loader.add_value(None, item)
            identifier = item['identifier']
            for option in variant:
                loader.add_value('name', option.xpath('text()').extract())
                identifier += '-' + option.xpath('@value').extract_first()
            loader.replace_value('identifier', identifier)
            loader.replace_value('sku', identifier)
            option_item = loader.load_item()
            option_item['metadata'] = item.get('metadata', {})
            yield option_item
예제 #9
0
    def parse_product(self, response):
        flix = '//script[@type="text/javascript"]/@data-flix-%s'
        name = response.xpath('//td/div[@align="center"]/b/text()').extract()
        if not name:
            return
        loader = ProductLoader(item=Product(), response=response)
        loader.add_value('name', name[0].strip(' ,'))
        loader.add_value('url', response.url)
        identifier = filter(lambda s: bool(s.strip()),
                            response.xpath(flix % 'ean').extract())
        if not identifier or not identifier[0]:
            identifier = response.xpath(
                '//b[contains(text(), "Model :")]/../text()[1]').extract()
        sku = response.xpath(flix % 'mpn').extract()
        if not sku or not sku[0]:
            sku = response.xpath(
                '//b[contains(text(), "Model")]/../text()[1]').extract()
        loader.add_value('identifier', identifier)
        loader.add_value('sku', sku)
        price = re.findall(u'POST.+?> *&#8364;(.+?) *<', response.body)
        loader.add_value('price', price)
        loader.add_xpath('category', '//h8//a[position()>1]/text()')
        loader.add_xpath('brand', flix % 'brand')
        stock = response.xpath(
            '//button[@value="Central Warehouse"]/../text()').extract_first()
        if not stock or 'Available' not in stock:
            loader.add_value('stock', 0)
        item = loader.load_item()
        if response.xpath('//img[@alt="Exdisplay"]'):
            item['metadata'] = {'Ex Display': 'Ex Display'}

        yield item
예제 #10
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        xpath = '//div[@class="nosto_product"]/span[@class="%s"]/text()'
        if not response.xpath('//div[@class="nosto_product"]'):
            for product in self.parse_category(response):
                yield product
            return

        loader = ProductLoader(item=Product(), response=response)
        loader.add_value('url', response.url)
        for s in ('name', 'price', 'image_url', 'brand'):
            loader.add_xpath(s, xpath % s)
        loader.add_xpath('identifier', xpath % 'product_id')
        loader.add_xpath('sku', '//h6[@class="product-model"]/text()')
        category = hxs.select(xpath % 'category').extract()
        if category:
            category.sort()
            loader.add_value('category', category[-1].strip('/').split('/'))
        loader.add_value('shipping_cost', 29.99)
        if 'InStock' not in hxs.select(xpath % 'availability').extract():
            loader.add_value('stock', 0)
        item = loader.load_item()
        if 'Ex Display' in item['name']:
            item['metadata'] = {'Ex Display': 'Ex Display'}
        yield item
예제 #11
0
 def parse_product(self, response):
     hxs = HtmlXPathSelector(response)
     base_url = get_base_url(response)
     data = hxs.select(
         '//script[@type="text/javascript"]/text()[contains(., "window.universal_variable")]'
     ).extract()[0]
     data = data.replace('\r\n', '')
     data = re.findall('window.universal_variable = ({.+})', data)[0]
     data = json.loads(data)
     product = data['product']
     loader = ProductLoader(item=Product(), selector=hxs)
     loader.add_value('url', product['url'])
     loader.add_value('name', product['name'])
     loader.add_value('price', product['unit_price'])
     loader.add_value('identifier', product['sku_code'])
     loader.add_value('sku', product['id'])
     loader.add_value('stock', int(product['stock']))
     loader.add_value('category', data['page']['breadcrumb'][1:-1])
     loader.add_value(
         'image_url',
         urljoin(
             base_url,
             hxs.select('//a[@id="ctl00_con1_ctl00_prodimg1_imglnk1"]/@href'
                        ).extract()[0]))
     item = loader.load_item()
     if item['price'] < 30:
         item['shipping_cost'] = 3.50
     yield item
     for url in hxs.select('//option/@value').extract():
         yield Request(url, callback=self.parse_product)
예제 #12
0
 def parse_doors(self, response):
     url = response.xpath('//link[@rel="canonical"]/@href').extract()
     category = response.xpath(
         '//p[@class="breadcrumbs"]/a[position()>1]/text()').extract()
     ids = response.xpath('//script/text()').re('ecomm_prodid.*(\[.+\])')
     ids = eval(ids[0])
     for i, product in enumerate(
             response.xpath('//div[@itemprop="offers"]')):
         loader = ProductLoader(item=Product(), selector=product)
         loader.add_xpath('name', './/h3[@itemprop="name"]/a/text()[1]')
         loader.add_value('identifier', ids[i])
         loader.add_value('sku', ids[i])
         loader.add_xpath('price', './/span[@itemprop="price"]/text()')
         local_url = product.xpath(
             './/h3[@itemprop="name"]/a/@href').extract()
         if local_url:
             local_url = response.urljoin(local_url[0])
         else:
             local_url = url
         loader.add_value('url', local_url)
         image_url = product.xpath('.//a/img/@src').extract()
         loader.add_value('image_url', response.urljoin(image_url[0]))
         loader.add_value('category', category)
         if not product.xpath(
                 'link[@itemprop="availability"][@href="http://schema.org/InStock"]'
         ):
             loader.add_value('stock', 0)
         if loader.get_output_value('price') < 750:
             loader.add_value('shipping_cost', 36)
         yield loader.load_item()
예제 #13
0
    def parse_product_base(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        image_xpath = '//div[@id="image-block"]//img[@itemprop="image"]/@src'

        breadcrumb = response.css('div.breadcrumb a span::text').extract()
        if len(breadcrumb) > 0:
            category = breadcrumb.pop().strip()
        else:
            category = ''

        try:
            name = response.css('div.primary_block h1::text').extract_first().strip()
        except:
            return

        product_brand = ''
        for brand in self.brands:
            if brand.lower() in category.lower() or name.lower().startswith(brand.lower()):
                product_brand = brand
                break

        allow_buy_out_stock = re.search('var allowBuyWhenOutOfStock = true;', response.body)

        image = hxs.select(image_xpath).extract().pop()
        product_url = urljoin_rfc(base_url, response.url)
        image_url = urljoin_rfc(base_url, image)

        # "var quantityAvailable = 7" means there are in total 7 products available in stock
        quantity = re.search('var quantityAvailable\D+(\d+)', response.body)
        product_id = re.search('var id_product\D+(\d+)', response.body)

        price = response.xpath('//span[@id="our_price_display"]//text()').extract()

        if price:
            price = price.pop()
        else:
            price = '0.00'

        loader = ProductLoader(response=response, item=Product())
        loader.add_value('url', product_url)
        loader.add_value('name', name)
        loader.add_value('brand', product_brand)
        loader.add_value('image_url', image_url)
        loader.add_value('price', price.replace(' ', '').replace(',', '.'))
        loader.add_value('category', category)
        loader.add_xpath('sku', '//p[@id="product_reference"]/span/text()')

        if product_id:
            loader.add_value('identifier', product_id.group(1))
        else:
            loader.add_xpath('identifier', '//form//input[@name="id_product"]/@value')

        stock = response.xpath('//span[@id="availability_value"]/text()').extract_first()
        
        if stock and stock.title() != 'In Stock':
            loader.add_value('stock', 0)

        return loader.load_item()
예제 #14
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)

        price = filter(
            lambda p: p.strip(),
            hxs.select("//span[@class='regular-price']//text()").extract())[1:]

        loader = ProductLoader(item=Product(), response=response)
        loader.add_value('price', price)
        loader.add_value('url', response.url)
        loader.add_xpath('name', "//div[@class='product-name']//h1//text()")
        loader.add_xpath(
            'category',
            "//div[@class='breadcrumbs']//li[position() > 1 and position() < last()]/a/text()"
        )
        brand = hxs.select(
            "//div[@class='product-shop']/div[@class='product-name']/a[@class='brand']/text()"
        ).extract()
        loader.add_value('brand', brand)
        loader.add_value('shipping_cost', 0)
        loader.add_xpath('sku', '//li/span[text()="SKU:"]/../text()')
        loader.add_xpath(
            'identifier',
            "//div[@class='product-view']//input[@name='product']/@value")
        image_urls = hxs.select(
            '//img[contains(@class, "gallery-image")]/@src').extract()
        for image_url in image_urls:
            if len(image_url) < 1024:
                loader.add_value('image_url', image_url)
                break
        product = loader.load_item()
        if product['price'] > 0:
            yield product
예제 #15
0
    def parse(self, response):
        transport = paramiko.Transport((CLIENTS_SFTP_HOST, CLIENTS_SFTP_PORT))
        password = "******"
        username = "******"
        transport.connect(username = username, password = password)
        sftp = paramiko.SFTPClient.from_transport(transport)
        
        file_path = os.path.join(HERE, 'bearmach_products.csv')
        sftp.get('bearmach_feed.csv', file_path)

        with open(file_path) as f:
            reader = csv.DictReader(f)
            reader.fieldnames = [field.strip() for field in reader.fieldnames]
            for row in reader:
                loader = ProductLoader(Product(), response=None)
                loader.add_value('identifier', row['Bearmach Part Number'].decode('latin-1'))
                loader.add_value('sku', row['Bearmach Part Number'].decode('latin-1'))
                loader.add_value('name', row['Description'].decode('latin-1'))
                loader.add_value('brand', row['Brand'].decode('latin-1'))
                loader.add_value('price', row['Retail'].decode('latin-1'))
                loader.add_value('category', row['Product Group'])
                item = loader.load_item()

                metadata = BearmachMeta()
                metadata['cost_price'] = str(extract_price(row['Cost'].decode('latin-1')))
                metadata['supplier_code'] = row['Supplier Code'].strip()
                metadata['supplier_name'] = row['Supplier Name'].strip()

                item['metadata'] = metadata
                yield item
예제 #16
0
    def parse_product(self, response):
        loader = ProductLoader(item=Product(), response=response)
        name = ' '.join(response.xpath('//div[@itemprop="name"]/*//text()').extract())
        loader.add_value('name', name)
        loader.add_value('url', response.url)
        image_url = response.xpath('//img[@class="left-image"]/@src').extract()
        if image_url:
            loader.add_value('image_url', response.urljoin(image_url[0]))
        price = response.xpath('//div[@itemprop="offers"]/p[@class="box-price"]/b/text()').extract()
        if not price:
            price = response.xpath('//div[@itemprop="offers"]/span[@itemprop="price"]/text()').extract()
        loader.add_value('price', price)
        brand = response.xpath('//img[@class="brand"]/@alt').extract()
        if not brand:
            brand = response.xpath('//div[@itemprop="name"]/h1/text()').extract()
        if brand and not brand[0].isdigit():
            loader.add_value('brand', brand)
        sku = response.xpath('//input[@type="hidden" and @name="productIdAnalytics"]/@value').extract()
        loader.add_value('sku', sku)
        loader.add_value('identifier', sku)
        item = loader.load_item()

        metadata = SpecSaversMeta()
        metadata['promotion'] = response.meta['promotional_data']
        item['metadata'] = metadata
        yield item
예제 #17
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     identifier = response.css('span#thisstkcode::text').extract_first()
     if not identifier:
         retries = response.meta.get('retries', 0)
         if retries > 9:
             self.logger.warning('No identifier found on %s' % response.url)
         else:
             self.logger.debug('Retry %s to get identifier' % response.url)
         meta = response.meta
         meta['retries'] = retries + 1
         yield response.request.replace('dont_filter=True', meta=meta)
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     loader.add_value('url', response.url)
     loader.add_xpath('name', '//h1/text()')
     price = response.css('span.prodPrice').xpath(
         './/span[@itemprop="price"]/text()').extract_first()
     loader.add_value('price', price)
     category = response.css('.breadcrumbs span::text').extract()[1:]
     loader.add_value('category', category)
     loader.add_css('image_url', '.main-product-photo::attr(href)')
     loader.add_css('brand', 'span#thisbrand::text')
     loader.add_css('stock', 'input#data-stock-qty::attr(value)')
     yield loader.load_item()
예제 #18
0
    def parse_node(self, response, node):
        loader = ProductLoader(item=Product(), selector=node)
        size = node.xpath('./*[local-name()="size"]/text()').extract()
        color = node.xpath('./*[local-name()="color"]/text()').extract()
        material = node.xpath('./*[local-name()="material"]/text()').extract()
        name = node.xpath('./*[local-name()="parent_title"]/text()').extract()
        if not name:
            name = node.xpath('./title/text()').extract()
        name = name[0]
        if material:
            name += u' {}'.format(material[0])
        if color:
            name += u' {}'.format(color[0])
        if size:
            name += u' {}'.format(size[0])
        loader.add_value('name', name)
        loader.add_xpath('url', './link/text()')
        loader.add_xpath('image_url', './*[local-name()="image_link"]/text()')
        loader.add_xpath('identifier', './*[local-name()="id"]/text()')
        loader.add_xpath('price', './*[local-name()="price"]/text()')
        loader.add_xpath('shipping_cost', './*[local-name()="shipping"]/*[local-name()="price"]/text()')
        loader.add_xpath('brand', './*[local-name()="brand"]/text()')
        loader.add_xpath('category', './*[local-name()="google_product_category"]/text()')
        loader.add_xpath('sku', './*[local-name()="mpn"]/text()')
        stock = node.xpath('./*[local-name()="availability"]/text()').extract()
        if stock and stock[0] == 'out of stock':
            loader.add_value('stock', 0)

        yield loader.load_item()
예제 #19
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     identifier = response.xpath('//input[@name="product_id"]/@value').extract_first()
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     loader.add_value('url', response.url)
     loader.add_xpath('name', '//h1[@itemprop="name"]/text()')
     loader.add_xpath('price', '//meta[@itemprop="price"]/@content')
     category = response.xpath('//div[@id="ProductBreadcrumb"]//a/text()').extract()[1:]
     loader.add_value('category', category)
     loader.add_xpath('image_url', '//img[@itemprop="image"]/@src')
     loader.add_xpath('brand', '//div[@itemtype="http://schema.org/Organization"]/meta[@itemprop="name"]/@content')
     if not response.xpath('//link[@itemprop="availability"]/@href[contains(., "InStock")]'):
         loader.add_value('stock', 0)
     
     sku = identifier
     name = loader.get_output_value('name')
     name_end = re.search('\S+$', name).group(0).strip(' ()')
     keywords = response.xpath('//meta[@name="keywords"]/@content').extract_first().split(',')
     keywords = [word.strip() for word in keywords if word]
     shortest_keyword = min(keywords, key=len) if keywords else 'none'
     from_name = re.findall('\S*\d+\S*', name)
     if shortest_keyword.lower() == name_end.lower():
         sku = name_end
     elif shortest_keyword.upper() == shortest_keyword:
         sku = shortest_keyword
     elif name_end.upper() == name_end:
         sku = name_end
     elif from_name:
         sku = max(from_name, key=len)
         if '(' in sku:
             sku = identifier
     loader.replace_value('sku', sku)
     yield loader.load_item()
예제 #20
0
    def parse_product(self, response):
        base_url = get_base_url(response)

        name = response.xpath('//h1[@class="product-view__title"]/span/text()').extract()
        name = map(lambda x: x.strip(), name)
        name = ' '.join(name)

        loader = ProductLoader(item=Product(), response=response)
        loader.add_value('name', name)
        loader.add_xpath('price', '//div[contains(@class, "product-view__total-price")]/@data-price')
        image_url = response.xpath('//img[@itemprop="image"]/@alt').extract()
        if image_url:
            loader.add_value('image_url', 'http:' + image_url[0])
        loader.add_xpath('brand', '//div[@class="product-view__brand brand"]/img[@class="brand__image"]/@alt')
        loader.add_value('category', 'Kontaktlinser')
        loader.add_value('url', response.url)
        identifier = re.findall('"ecomm_prodid":"(\d+)","', response.body)[0]
        loader.add_value('identifier', identifier)
        loader.add_value('sku', identifier)

        metadata = SpecSaversMeta()
        promotion = response.xpath('//section[contains(@class, "product-view--product-page")]//figcaption[@class="splash__inner"]//text()').extract()
        if promotion:
            promotion = [s for s in map(lambda x: x.strip(), promotion) if s != '']
            promotion = ' '.join(promotion)
        else:
            promotion = ''
        metadata['promotion'] = promotion

        item = loader.load_item()
        item['metadata'] = metadata
        yield item
예제 #21
0
    def parse_product(self, response):
        product = re.findall('"products":(.*)}}}', response.body)
        if product:
            product = json.loads(product[0])[0]

            loader = ProductLoader(item=Product(), response=response)
            name = response.xpath(
                '//div[contains(@class,"field-name-title")]/h1/text()'
            ).extract()
            name += response.xpath(
                '//div[contains(@class,"field-name-field-cl-lens-type")]/div/span/text()'
            ).extract()
            name += response.xpath(
                '//div[contains(@class,"form-item-cl-supply")]/text()'
            ).extract()
            loader.add_value('name', u' '.join([x.strip() for x in name]))
            loader.add_value('identifier', response.url.split('/')[-1])
            loader.add_value('url', response.url)
            loader.add_value('brand', product['brand'])
            loader.add_value('category', product['category'])
            image_url = response.xpath(
                '//img[contains(@class, "img-responsive")]/@src').extract()
            if image_url:
                loader.add_value('image_url', image_url)
            loader.add_value('price', product['price'])

            yield loader.load_item()
예제 #22
0
    def parse_product(self, response):
        brand = response.meta['brand']
        brands = response.meta['brands']

        loader = ProductLoader(Product(), response=response)

        sku_searched = response.meta['sku']
        sku = response.css('.part-number strong::text').extract_first()
        if not sku or sku.strip().upper() != sku_searched:
            return

        product_brand = response.xpath(
            '//tr[th[contains(text(), "Brand")]]/td[contains(@class, "data")]/text()'
        ).extract()[0]
        if product_brand.upper().strip() not in brands:
            return

        loader.add_value('identifier', sku)
        loader.add_value('url', response.url)
        loader.add_css('name', '.product-name .h1::text')
        loader.add_xpath(
            'price', '//span[contains(@id, "price-excluding-tax")]/text()')
        loader.add_value('sku', sku)
        category = response.css('.breadcrumbs a::text').extract()[1:]
        loader.add_value('category', category)
        loader.add_css('image_url', 'img#image-main::attr(src)')
        loader.add_value('brand', brand)
        if response.css('.availability .out-of-stock'):
            loader.add_value('stock', 0)
        item = loader.load_item()
        if item['price'] < 50:
            item['shipping_cost'] = 5
        yield item
예제 #23
0
 def parse_product(self, response):
     loader = ProductLoader(Product(), response=response)
     loader.add_xpath('identifier', '//input[@name="productid"]/@value')
     loader.add_value('url', response.url)
     loader.add_css('name', '.descr::text')
     loader.add_css('price', 'span.currency::text')
     loader.add_value('sku', response.meta['sku'])
     image_url = response.css(
         'img#product_thumbnail::attr(src)').extract_first()
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url))
     loader.add_value('brand', response.meta['brand'])
     stock = response.css('.quantity script::text').re(
         'product_avail = (\d+);')[0]
     loader.add_value('stock', stock)
     item = loader.load_item()
     if stock == '0':
         yield item
         return
     request = FormRequest.from_response(response,
                                         formname='orderform',
                                         meta={
                                             'cookiejar':
                                             item['identifier'],
                                             'item': Product(item)
                                         },
                                         cookies=self.cookies,
                                         callback=self.parse_shipping,
                                         dont_filter=True)
     yield request
예제 #24
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        loader = ProductLoader(item=Product(), response=response)
        # identifier = hxs.select('').extract()
        sku = hxs.select('//p/span[@itemprop="sku"]/text()').extract()
        identifier = sku
        if not sku:
            identifier = response.url.split('/')[-1].split('.')[0]
        loader.add_value('identifier', identifier)
        loader.add_value('sku', sku)
        if identifier in self.seen_ids:
            return
        self.seen_ids.append(identifier)
        name = hxs.select('//h1[@class="first"]/span[@itemprop="name"]/text()'
                          ).extract()[0].strip()
        try:
            loader.add_value('name', name)
        except:
            loader.add_value('name', name.decode('utf-8', 'replace'))
        category = hxs.select('//ol[@class="breadcrumb"]//a/text()').extract()
        loader.add_value('category', ' > '.join(category[1:][-3:]))
        image_url = hxs.select('//a[@class="lightbox"]/img/@src').extract()
        if image_url:
            loader.add_value('image_url', urljoin_rfc(base_url, image_url[0]))
        loader.add_value('url', response.url)

        price = hxs.select(
            '//span[@class="price-big orange"]/text()').extract()[0]
        loader.add_value('price', price)
        if not loader.get_output_value('price'):
            loader.add_value('stock', 0)
        yield loader.load_item()
예제 #25
0
 def parse_product(self, response):
     loader = ProductLoader(item=Product(), response=response)
     loader.add_xpath('name', '//div[@class="detailstitle"]/text()')
     loader.add_xpath('identifier',
                      '//script/text()',
                      re="'productID':'(\w+?)'")
     loader.add_xpath('sku', '//script/text()', re="'productID':'(\w+?)'")
     loader.add_value('url', response.url)
     loader.add_xpath('price',
                      '//script/text()',
                      re="'productValue':'([\d\.]+?)'")
     loader.add_xpath('category',
                      '//div[@class="breadcrumb"]/a[position()>1]/text()')
     image_url = response.xpath(
         '//div[@class="mainProductImage"]//img/@src').extract()
     if not image_url:
         image_url = response.xpath(
             '(//div[@class="thumbnail"])[2]//input[@type="image"]/@src'
         ).extract()
         image_url = [image_url[0].replace('XSmall', 'Large')]
     if image_url:
         loader.add_value('image_url', response.urljoin(image_url[0]))
     loader.add_xpath(
         'brand',
         '(//td[contains(h5/text(), "Brand")])[1]/following-sibling::td[1]/span/text()'
     )
     if not response.xpath(
             '//div[@id="availDelTick"]//a[@class="BasketTickOn"]'):
         loader.add_value('stock', 0)
     yield loader.load_item()
예제 #26
0
    def parse_products(self, response):
        products = response.xpath(
            '//div[@class="productListItem"]/div[@class="productListLink"]/a/@href'
        ).extract()
        for url in products:
            req = Request(response.urljoin(url),
                          dont_filter=True,
                          callback=self.parse_product,
                          meta=response.meta)
            req.headers['Set-Cookie'] = self.cookies
            yield req

        if not products:
            row = response.meta['row']

            loader = ProductLoader(item=Product(), response=response)
            loader.add_value('url', '')

            name = row['Product Description'] + ' ' + row[
                'Size Description'] + ' ' + row['HERO NAME'] + ' ' + row[
                    'HERO NUMBER']
            loader.add_value('name', name)
            loader.add_value('image_url', '')
            loader.add_value('category', '')
            loader.add_value('brand', row['Merret Department'])
            loader.add_value('price', 0)
            loader.add_value('stock', 0)
            loader.add_value('identifier',
                             row['SKU VID'].decode('unicode_escape'))
            loader.add_value('sku', row['SKU VID'].decode('unicode_escape'))
            item = loader.load_item()
            yield item
예제 #27
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        category = response.xpath(
            '//div[@class="breadcrumbs"]//a/span/text()').extract()[1:]
        identifier = hxs.select('//input[@name="product"]/@value').extract()[0]
        image_url = hxs.select(
            '//div[@class="product-img-box"]/a[@id="main-image"]/img/@src'
        ).extract()
        name = normalize_name(hxs.select('//h1/text()').extract()[0])
        price = "".join(
            hxs.select(
                '//div[@class="product-view"]//div[@class="price-box"]//span[contains(@id, "price-including-tax-")]//text()'
            ).extract()).replace(',', '.').replace(u'\xa0', "").strip()
        sku = hxs.select('//*[@itemprop="sku"]/text()').extract()

        loader = ProductLoader(item=Product(), selector=hxs)
        loader.add_value('identifier', identifier)
        loader.add_value('name', name)
        loader.add_value('price', price)
        loader.add_value('sku', sku)
        loader.add_value('url', response.url)
        if category:
            loader.add_value('category', category[0])
        if image_url:
            loader.add_value('image_url', image_url[0])

        loader.add_value('stock', 1)

        item = loader.load_item()

        if not item['identifier'] in self.identifiers_collected:
            self.identifiers_collected.add(item['identifier'])
            yield item
예제 #28
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        box = hxs.select('//div[@class="prod-box"]')
        crumbs = hxs.select('//ul[@class="breadcrumbs"]')[0]
        loader = ProductLoader(selector=box, item=Product())
        loader.add_value('url', response.url)
        brand = crumbs.select('.//a[contains(text(), "Brands")]/../following-sibling::li[1]/a/text()').extract()
        loader.add_value('brand', brand)
        categories = crumbs.select('.//a/text()').extract()
        categories = [cat for cat in categories if "Brand" not in cat]
        loader.add_value('category', categories)
        image_url = hxs.select('//section[@id="one"]//@src').extract()
        if not image_url:
            yield Request(response.url, callback=self.parse_category, dont_filter=True)
            return
        loader.add_value('image_url', urljoin(base_url, image_url[0]))
        loader.add_xpath('name', './h1/text()')
        loader.add_xpath('identifier', '//*/@prodref')
        loader.add_xpath('sku', '//*/@prodref')
        if not box.select('//*[text()="In Stock" or text()="Low Stock"]'):
            loader.add_value('stock', 0)
        loader.add_xpath('price', './/span[@class="product-price"]/text()')
        product = loader.load_item()
        if product['price'] < 20:
            product['shipping_cost'] = 2
        elif product['price'] < 40:
            product['shipping_cost'] = 4.99
        yield product
예제 #29
0
    def parse(self, response):

        response.selector.register_namespace("g",
                                             "http://base.google.com/ns/1.0")

        for item in response.xpath('//item'):
            image_url = item.xpath('g:image_link/text()').extract()
            image_url = image_url[0] if image_url else ''
            category = item.xpath('g:product_type/text()').extract()
            category = category[0].split('>')[1:] if category else ''
            brand = item.xpath('g:brand/text()').extract()
            identifier = item.xpath('g:id/text()').extract()
            name = item.xpath('title/text()').extract_first()
            if name:
                name = name.replace('...', '').strip()
            price = item.xpath('g:price/text()').extract()
            price = extract_price(price[0]) if price else 0
            url = item.xpath('link/text()').extract()[0]
            out_of_stock = item.xpath(
                'g:availability/text()').extract()[0] == 'out of stock'

            product_loader = ProductLoader(item=Product(), response=response)
            product_loader.add_value('identifier', identifier)
            product_loader.add_value('sku', identifier)
            product_loader.add_value('name', name)
            product_loader.add_value('image_url', image_url)
            product_loader.add_value('price', price)
            product_loader.add_value('url', url)
            product_loader.add_value('brand', brand)
            product_loader.add_value('category', category)
            if out_of_stock:
                product_loader.add_value('stock', 0)
            product = product_loader.load_item()

            yield product
예제 #30
0
 def parse_product(self, response):
     data = response.xpath('//script/text()').re('{\\\\"Variants.+}')[0]
     data = json.loads(data.replace('\\"', '"'))
     variants = data['Variants']
     for variant in variants:
         url = response.urljoin(variant['ProductPLU'])
         yield Request(make_variant_url(url), self.parse_product)
     
     loader = ProductLoader(item=Product(), response=response)
     identifier = response.xpath('//input[@id="ProductPLU"]/@value').extract_first()
     loader.add_value('identifier', identifier)
     loader.add_value('sku', identifier)
     loader.add_value('url', response.url)
     loader.add_xpath('name', '(//h1[@itemprop="name"]/text())[1]')
     metadata = {}
     for i in xrange(3):
         variant_name = data['Variant%dSelected' %(i+1)]
         if variant_name and variant_name != 'N/A':
             loader.add_value('name', variant_name)
             metadata[data['Variant%dHeader' %(i+1)]] = variant_name
             if 'size' in variant_name.lower():
                 metadata['size'] = variant_name[5:].strip()
     price = response.css('.price-value .currency::text').extract()
     loader.add_value('price', price.pop())
     category = response.css('.breadcrumb a::text').extract()
     loader.add_value('category', category[1:])
     loader.add_css('image_url', '.product-image::attr(src)')
     loader.add_xpath('brand', '//meta[@itemprop="brand"]/@content')
     loader.add_value('shipping_cost',  '7.95')
     stock = response.css('.product-stock-widget::attr(ng-init)').re('AvailableOnline: (\w+)')[0]
     if stock != 'true':
         loader.add_value('stock', 0)
     item = loader.load_item()
     item['metadata'] = metadata
     yield item