Beispiel #1
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
  
        products = hxs.select('//div[contains(@id,"Tyre") and contains(@class, "tyre-list-tyre")]')

        for product in products:
            loader = ProductLoader(item=Product(), selector=product)
            loader.add_xpath('name', 'div//div[@class="manufacturerText"]/p/strong/text()')
            brand = ''.join(product.select('div//div[@class="manufacturerImage"]/img/@alt').extract()).split(' - ')[0]
            winter_tyre = product.select('div//img[@alt="Winter Tyre"]')
            if not winter_tyre:
                loader.add_value('brand', unify_brand(brand))
                loader.add_value('category', find_brand_segment(loader.get_output_value('brand')))
                identifier = product.select('div//div[@class="pricingAddToOrder clearfix"]/input/@value').extract()[0]
 
                loader.add_value('url', '')

                image_url = product.select('div[@class="image"]/img/@src').extract()
                if image_url:
                    loader.add_value('image_url', urljoin(get_base_url(response), image_url[0]))

                loader.add_value('identifier', identifier)
                price = product.select('div//div[contains(@class, "pricingSelection")]//a/strong/text()').extract()
                price = re.findall(r"\d+.\d+", price[0]) if price else '0.0'
                loader.add_value('price', price)

                tyresize_text = product.select('.//div[contains(@class, "manufacturerText")]/p/span/text()').extract()[0].strip()
                width, aspect, speed_rating, rim = re.search(r'tyre size (\d+)\/(\d+)(\w{1})(\d+)', tyresize_text, re.I).groups()

                fitting_method = 'Fitted'

                metadata = MicheldeverMeta()
                metadata['aspect_ratio'] = aspect
                metadata['rim'] = rim

                metadata['speed_rating'] = speed_rating

                metadata['width'] = width
                metadata['fitting_method'] = fitting_method
                load_rating = product.select('div//li/a[@rel="load-index-description"]/text()').extract()
                metadata['load_rating'] = load_rating[0].split(': ')[-1] if load_rating else ''
                metadata['alternative_speed_rating'] = ''
                xl = product.select('div//img[@title="Reinforced"]/@title').extract()
                metadata['xl'] = 'Yes' if xl else 'No'

                run_flat = product.select('div//img[@title="Run Flat"]').extract()
                metadata['run_flat'] = 'Yes' if run_flat else 'No'
                manufacturer_mark = product.select('div//img[contains(@title, "Homologated for fitment to certai")]/@title').extract()
                manufacturer_mark = manufacturer_mark[0].replace('Homologated for fitment to certain ' ,'').replace(' cars.' ,'') if manufacturer_mark else ''
 
                metadata['manufacturer_mark'] = find_man_mark(manufacturer_mark) if manufacturer_mark else ''

                metadata['full_tyre_size'] = '/'.join((metadata['width'],
                                                       metadata['aspect_ratio'],
                                                       metadata['rim'],
                                                       metadata['load_rating'], 
                                                       metadata['speed_rating']))
                                                       #metadata['alternative_speed_rating']))
     
                product = loader.load_item()
                product['metadata'] = metadata

                if not is_product_correct(product):
                    continue

                product['metadata']['mts_stock_code'] = find_mts_stock_code(product, spider_name=self.name, log=self.log)

                new_speed_rating = get_speed_rating(product)
                new_alt_speed = get_alt_speed(product)
                product['metadata']['alternative_speed_rating'] = new_alt_speed if new_alt_speed else \
                    product['metadata']['speed_rating'] if product['metadata']['speed_rating'] != new_speed_rating else ''
                product['metadata']['speed_rating'] = new_speed_rating
 
                yield product
Beispiel #2
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        base_url = get_base_url(response)

        products = hxs.select('//tr[contains(@class,"tyre-search-row")]')

        next_page = []
        if next_page:
            yield Request(urljoin_rfc(base_url, next_page[0]),
                          meta=response.meta)

        not_found_count = 0

        for product in products:
            url = product.select('.//td/b/a/@href')[0].extract()
            winter_tyre = product.select('.//td/b/a/text()')[0].extract()
            winter_tyre = 'winter' in winter_tyre.lower()
            if not winter_tyre:
                brand = product.select('.//a/img/@src')[0].extract()
                brand = re.search('/public/brands/(.*?)(-tyres)?\.',
                                  brand).group(1).replace('-', ' ').title()
                product_name = product.select('.//td/b/a/text()')[0].extract()
                product_name = re.sub(brand, '', product_name).strip()
                fitting_method = 'Delivered'
                identifier = product.select(
                    './/input[@name="item_id"]/@value').extract()
                if not identifier:
                    identifier = product.select('.//a/@href').re(
                        'email_me_stock/(.*)')
                if not identifier:
                    continue
                try:
                    fuel, grip, noise = map(
                        unicode.strip,
                        product.select(
                            './/img[contains(@alt, "Tyre Label")]/following-sibling::text()'
                        ).extract())
                except:
                    fuel = ''
                    grip = ''
                    noise = ''

                price = product.select("td[3]/b/text()").extract()
                loader = ProductLoader(item=Product(), selector=hxs)
                loader.add_value('identifier', identifier[0])
                loader.add_value('name', product_name)
                loader.add_value('brand', unify_brand(brand))
                loader.add_value(
                    'category',
                    find_brand_segment(loader.get_output_value('brand')))
                loader.add_value('url', url)
                if price:
                    loader.add_value('price', price[0])
                else:
                    loader.add_value('price', '0.00')
                    loader.add_value('stock', 0)

                pattern_name = product.select('.//i/text()').extract()
                if not pattern_name:
                    continue
                pattern_name = pattern_name[0]

                data = re.search(
                    '(?P<Width>\d+)/(?P<Aspect_Ratio>\d+) R(?P<Rim>\d+) (?P<Speed_Rating>[A-Za-z]{1,2}) \((?P<Load_Rating>\d+).*?\)',
                    pattern_name)
                if data:
                    data = data.groupdict()
                else:
                    msg = 'ERROR parsing "{}" [{}]'.format(
                        pattern_name, response.url)
                    self.log(msg)
                    continue

                metadata = MicheldeverMeta()
                metadata['aspect_ratio'] = data['Aspect_Ratio']
                metadata['rim'] = data['Rim']
                metadata['speed_rating'] = data['Speed_Rating'].upper()

                metadata['width'] = data['Width']
                metadata['fitting_method'] = fitting_method
                metadata['load_rating'] = data['Load_Rating'] or ''
                metadata['alternative_speed_rating'] = ''
                xl = 'XL' in pattern_name
                metadata['xl'] = 'Yes' if xl else 'No'

                run_flat_found = is_run_flat(pattern_name)
                run_flat = 'run flat' in pattern_name.lower(
                ) or 'runflat' in pattern_name.lower() or run_flat_found
                metadata['run_flat'] = 'Yes' if run_flat else 'No'
                manufacturer_mark = [
                    mark for mark in self.all_man_marks.keys()
                    if mark in pattern_name.split(' ')
                ]
                manufacturer_mark = manufacturer_mark[0].strip(
                ) if manufacturer_mark else []
                metadata['manufacturer_mark'] = find_man_mark(
                    manufacturer_mark) if manufacturer_mark else ''

                metadata['full_tyre_size'] = '/'.join(
                    (metadata['width'], metadata['aspect_ratio'],
                     metadata['rim'], metadata['load_rating'],
                     metadata['speed_rating']))

                metadata['fuel'] = fuel
                metadata['grip'] = grip
                metadata['noise'] = noise

                product = loader.load_item()
                product['metadata'] = metadata

                if not is_product_correct(product):
                    not_found_count += 1
                    self.log('%s - PRODUCT IS NOT CORRECT: %r' %
                             (not_found_count, product))
                    continue

                product['metadata']['mts_stock_code'] = find_mts_stock_code(
                    product, spider_name=self.name, log=self.log)

                if product['url'] in self.images:
                    product['image_url'] = self.images[product['url']]
                    yield product
                else:
                    yield Request(product['url'],
                                  callback=self.parse_image,
                                  meta={'product': product},
                                  dont_filter=True)
Beispiel #3
0
    def parse_product(self, response):
        hxs = HtmlXPathSelector(response)
        base_loader = ProductLoader(item=Product(), selector=hxs)
        # the full name of the tyre (name variable) is used to extract metadata (i.e. run flat, xl),
        # the pattern should be set as the product's name
        brand = response.meta.get('brand') or ''
        product_name = hxs.select('//h2[@class="heading black"]/text()')[0].extract().strip()
        product_name = re.sub(brand, '', product_name).strip()
        fitting_method = 'Delivered'
    
        base_loader.add_value('url', response.url)
    
        image_url = hxs.select('//div[@class="item"]/a/img/@src').extract()
        options = hxs.select('//div[@style="background: #fff; padding: 6px; "]')
        for option in options:
            loader = ProductLoader(item=Product(), selector=hxs)
            loader.add_value('name', product_name)
            loader.add_value('brand', unify_brand(brand))
            loader.add_value('category', find_brand_segment(loader.get_output_value('brand')))
            loader.add_value('url', response.url)
            if image_url:
                loader.add_value('image_url', urljoin(get_base_url(response), image_url[0]))
            identifier = option.select('../input[@type="hidden" and @name="item_id"]/@value').extract()
            if not identifier:
                identifier = option.select('./a/@href').re('email_me_stock/(.*)')
            if not identifier:
                continue
            loader.add_value('identifier', identifier[0])
            price = option.select('./strong[@class="price" and not(contains(text(),"On Backorder"))]/text()').extract()
            if price:
                loader.add_value('price', price[0]) 
            else:
                if response.meta.get('price'):
                    loader.add_value('price', response.meta['price'])
                else:
                    loader.add_value('price', '0.00')
                loader.add_value('stock', 0)
        
            pattern_name = option.select('./p/strong/text()').extract()
            if not pattern_name:
                pattern_name = option.select('./strong/text()').extract()
            pattern_name = pattern_name[0]
            data = re.search('(?P<Width>\d+)/(?P<Aspect_Ratio>\d+) R(?P<Rim>\d+) (?P<Speed_Rating>[A-Za-z]{1,2}) \((?P<Load_Rating>\d+).*?\)',
                              pattern_name)
            if data:
                data = data.groupdict()
            else:
                msg = 'ERROR parsing "{}" [{}]'.format(pattern_name, response.url)
                log.msg(msg)
                self.errors.append(msg)
                continue
            metadata = MicheldeverMeta()
            metadata['aspect_ratio'] = data['Aspect_Ratio']
            metadata['rim'] = data['Rim']
            metadata['speed_rating'] = data['Speed_Rating'].upper()
        
            metadata['width'] = data['Width']
            metadata['fitting_method'] = fitting_method
            metadata['load_rating'] = data['Load_Rating'] or ''
            metadata['alternative_speed_rating'] = ''
            xl = 'XL' in pattern_name
            metadata['xl'] = 'Yes' if xl else 'No'
        
            run_flat = 'run flat' in pattern_name.lower() or 'runflat' in pattern_name.lower()
            metadata['run_flat'] = 'Yes' if run_flat else 'No'
            manufacturer_mark = [mark for mark in self.all_man_marks.keys() if mark in pattern_name.split(' ')]
            manufacturer_mark = manufacturer_mark[0].strip() if manufacturer_mark else []
            metadata['manufacturer_mark'] = find_man_mark(manufacturer_mark) if manufacturer_mark else ''

            metadata['full_tyre_size'] = '/'.join((metadata['width'],
                                                   metadata['aspect_ratio'],
                                                   metadata['rim'],
                                                   metadata['load_rating'], 
                                                   metadata['speed_rating']))
                                                    #metadata['alternative_speed_rating']))
        
            product = loader.load_item()
            product['metadata'] = metadata

            if not is_product_correct(product):
                continue

            product['metadata']['mts_stock_code'] = find_mts_stock_code(product, spider_name=self.name, log=self.log)

            new_speed_rating = get_speed_rating(product)
            new_alt_speed = get_alt_speed(product)
            product['metadata']['alternative_speed_rating'] = new_alt_speed if new_alt_speed else \
                product['metadata']['speed_rating'] if product['metadata']['speed_rating'] != new_speed_rating else ''
            product['metadata']['speed_rating'] = new_speed_rating
        
            yield product
    def parse(self, response):
        products = response.xpath(
            '//div[contains(@class, "tyres_search_results_tyre") and @data-viewtype="grid"]'
        )

        for product in products:
            winter_tyre = product.xpath(
                '@data-filter-season').extract()[0] == 'Winter'
            if not winter_tyre:
                name = product.xpath(
                    './/div[contains(@class, "tyre-model text-center")]/text()'
                ).extract()[0]
                brand = product.xpath('@data-filter-brand').extract()[0]

                loader = ProductLoader(item=Product(), selector=product)
                loader.add_value('name', brand + ' ' + name)
                loader.add_value('brand', unify_brand(brand))
                loader.add_value(
                    'category',
                    find_brand_segment(loader.get_output_value('brand')))
                identifier = product.xpath('@data-tyreid').extract()[0]
                loader.add_value('identifier', identifier)
                loader.add_value('url', response.url)
                image_url = product.xpath(
                    './/div[contains(@class, "tyre-image")]//img/@src'
                ).extract()
                if image_url:
                    loader.add_value(
                        'image_url',
                        urljoin(get_base_url(response), image_url[0]))
                price = product.xpath(
                    './/div[contains(@class, "tyre-pricing-information")]/div/text()'
                ).re(r'[\d,.]+')
                price = price[0] if price else '0.00'
                loader.add_value('price', price)
                tyresize_text = product.xpath(
                    './/div[contains(@class, "tyre-size")]/text()').extract(
                    )[0].strip()
                try:
                    width, aspect, speed_rating, rim, load_rating = re.search(
                        r'(\d+)\/(\d+)(\w{1})(\d+)\s\((\d+)\)', tyresize_text,
                        re.I).groups()
                except:
                    width, aspect, speed_rating, rim = re.search(
                        r'(\d+)\/(\d+)(\w{1})(\d+)', tyresize_text,
                        re.I).groups()
                    load_rating = ''

                fitting_method = 'Fitted'

                metadata = MicheldeverMeta()
                metadata['aspect_ratio'] = aspect
                metadata['rim'] = rim

                metadata['speed_rating'] = speed_rating

                metadata['width'] = width
                metadata['fitting_method'] = fitting_method
                metadata['load_rating'] = load_rating
                metadata['alternative_speed_rating'] = ''
                xl = product.xpath(
                    '@data-filter-reinforced').extract()[0] == 'Y'
                metadata['xl'] = 'Yes' if xl else 'No'

                run_flat_found = is_run_flat(loader.get_output_value('name'))
                run_flat = product.xpath(
                    '@data-filter-runflat').extract()[0] == 'Y'
                metadata[
                    'run_flat'] = 'Yes' if run_flat or run_flat_found else 'No'
                manufacturer_mark = product.xpath('.//span[contains(@title, "Homologated for fitment to certai")]/@title')\
                                           .re(r'Homologated for fitment to certain (.*) cars\.')

                metadata['manufacturer_mark'] = find_man_mark(
                    manufacturer_mark[0]) if manufacturer_mark else ''

                metadata['full_tyre_size'] = '/'.join(
                    (metadata['width'], metadata['aspect_ratio'],
                     metadata['rim'], metadata['load_rating'],
                     metadata['speed_rating']))

                fuel, grip, noise = product.xpath('@data-filter-tyreefficiencyr'
                                                  '|@data-filter-tyreefficiencyg'
                                                  '|@data-filter-tyreefficiencyd')\
                                           .extract()
                metadata['fuel'] = fuel
                metadata['grip'] = grip
                metadata['noise'] = noise

                product = loader.load_item()
                product['metadata'] = metadata

                if not is_product_correct(product):
                    continue

                product['metadata']['mts_stock_code'] = find_mts_stock_code(
                    product, spider_name=self.name, log=self.log)

                yield product
Beispiel #5
0
    def parse_products(self, response):
        json_data = json.loads(response.body)
        products = json.loads(json_data.get('d'))

        for product_el in products:
            loader = ProductLoader(item=Product(), selector=product_el)

            try:
                brand = product_el[u'ProductManufacturer'][
                    u'TyreManufacturerName']
            except:
                brand = ''

            winter_tyre = product_el[u'ProductAttributes'][u'IsWinter']
            # skip winter tyres
            if winter_tyre:
                continue
            for tyre_brand in self.brands:
                if tyre_brand.upper() == brand.strip().upper():
                    brand = tyre_brand

            try:
                full_name = product_el[u'ProductTreadPattern'][u'TreadName']
            except:
                full_name = ''
            # Fix name changes
            if full_name in self.new_old_names:
                full_name = self.new_old_names[full_name]

            loader.add_value('name', full_name)
            loader.add_value('brand', unify_brand(brand))
            loader.add_value(
                'category',
                find_brand_segment(loader.get_output_value('brand')))
            identifier = product_el.get('TyreID')
            loader.add_value('url', 'http://www.tyresonthedrive.com')
            image_url = 'http://www.tyresonthedrive.com/img/treads/' + product_el[
                u'ProductTreadPattern'][u'TreadPatternImage'] + '.jpg'
            loader.add_value('image_url', image_url)
            loader.add_value('identifier', identifier)

            price = product_el[u'CheapestPriceTwoDay'][u'OneTyrePriceIncVat']
            if not price:
                loader.add_value('stock', 0)
            loader.add_value('price', price)

            metadata = MicheldeverMeta()

            metadata['aspect_ratio'] = str(
                product_el[u'ProductAttributes'][u'Profile'])
            metadata['rim'] = str(product_el[u'ProductAttributes'][u'Rim'])
            metadata['speed_rating'] = str(
                product_el[u'ProductAttributes'][u'Speed'])
            metadata['load_rating'] = str(
                product_el[u'ProductAttributes'][u'Load'])
            metadata['width'] = str(
                product_el[u'ProductAttributes'][u'Section'])
            metadata['fitting_method'] = 'Fitted'
            metadata['alternative_speed_rating'] = ''
            metadata['xl'] = 'Yes' if product_el[u'ProductAttributes'][
                u'IsExLoad'] else 'No'
            metadata['run_flat'] = 'Yes' if product_el[u'ProductAttributes'][
                u'IsRunFlat'] else 'No'

            man_mark = product_el[u'ProductAttributes'][u'OEMFitment']
            metadata['manufacturer_mark'] = find_man_mark(
                man_mark) if man_mark else ''

            metadata['full_tyre_size'] = '/'.join(
                (metadata['width'], metadata['aspect_ratio'], metadata['rim'],
                 metadata['load_rating'], metadata['speed_rating']))
            product = loader.load_item()
            product['metadata'] = metadata

            if not is_product_correct(product):
                continue

            product['metadata']['mts_stock_code'] = find_mts_stock_code(
                product, spider_name=self.name, log=self.log)

            new_speed_rating = get_speed_rating(product)
            new_alt_speed = get_alt_speed(product)
            product['metadata']['alternative_speed_rating'] = new_alt_speed if new_alt_speed else \
                product['metadata']['speed_rating'] if product['metadata']['speed_rating'] != new_speed_rating else ''
            product['metadata']['speed_rating'] = new_speed_rating

            yield product
Beispiel #6
0
    def parse(self, response):
        row = response.meta['row']

        json_data = None
        for line in response.body.split('\n'):
            if "JsonObject = " in line:
                json_data = json.loads(
                    line.replace('JsonObject = ', '').replace('; \r', ''))

        products = json_data['Rest'] + json_data['Deals']

        collected_products = []

        self.log('Results found {} {}'.format(len(products), response.meta))
        for product_info in products:
            # skip winter tyres
            if product_info['WinterTyre']:
                continue

            loader = ProductLoader(item=Product(), selector=product_info)
            loader.add_value('name', product_info['ModelName'])
            brand = product_info['Manufacturer']

            loader.add_value('brand', unify_brand(brand))
            loader.add_value(
                'category',
                find_brand_segment(loader.get_output_value('brand')))
            identifier = product_info['PrimaryId']
            fitting_method = 'Fitted'
            if str(identifier) + '-' + fitting_method in self.seen_ids:
                continue

            url = '/catalogue' + product_info[
                'CatalogueUrl'] + '/f?tyre=' + str(product_info['PrimaryId'])
            loader.add_value('url', response.urljoin(url))

            image_url = product_info.get('ModelImageLarge')
            if not image_url:
                image_url = product_info.get('ModelImage')

            if image_url:
                image_url = image_url.split('src="')[-1].split('"')[0]
                loader.add_value('image_url', response.urljoin(image_url))

            spec = product_info['SpecificationName']
            metadata = MicheldeverMeta()
            # metadata['mts_stock_code'] = row['MTS Stockcode']
            metadata['aspect_ratio'] = row['Aspect Ratio']
            metadata['rim'] = row['Rim']
            metadata['speed_rating'] = spec.split()[-1]
            metadata['width'] = row['Width']

            load_rating = product_info['LoadRatingName']
            metadata['load_rating'] = load_rating
            metadata['alternative_speed_rating'] = ''
            xl = product_info['Reinforced']
            metadata['xl'] = 'Yes' if xl else 'No'
            run_flat_found = is_run_flat(product_info['ModelName'])
            run_flat = product_info['RunFlat']
            metadata[
                'run_flat'] = 'Yes' if run_flat or run_flat_found else 'No'
            manufacturer_mark = product_info['Variant']
            if manufacturer_mark:
                manufacturer_mark = manufacturer_mark.split()[0].strip()

            full_tyre_size = '/'.join(
                (row['Width'], row['Aspect Ratio'], row['Rim'],
                 metadata['load_rating'], metadata['speed_rating']))
            # MOE Exception for this product
            if manufacturer_mark and 'MO EXTENDED' in product_info['Variant'].upper()\
               and product_info['ModelName'] == 'Potenza S001' and full_tyre_size == '245/40/18/97/Y':
                metadata['manufacturer_mark'] = 'MOE'
            else:
                metadata['manufacturer_mark'] = find_man_mark(
                    manufacturer_mark) if manufacturer_mark else ''

            metadata['full_tyre_size'] = full_tyre_size

            try:
                metadata['fuel'] = product_info['TyreLabelFuel']['Score']
            except Exception:
                metadata['fuel'] = ''

            try:
                metadata['grip'] = product_info['TyreLabelWet']['Score']
            except Exception:
                metadata['grip'] = ''

            try:
                metadata['noise'] = product_info['TyreLabelNoise'][
                    'NoiseLevel']
            except Exception:
                metadata['noise'] = ''

            product = loader.load_item()
            product['metadata'] = metadata

            product['price'] = product_info['FullyFittedPrice']
            fitting_method = 'Fitted'
            product['identifier'] = str(identifier) + '-' + fitting_method
            product['metadata']['fitting_method'] = fitting_method

            t1 = time.time()
            if not is_product_correct(product):
                self.log('Search: {}'.format(str(response.meta)))
                self.seen_ids.add(str(identifier) + '-' + fitting_method)
                self.log('PRODUCT IS NOT CORRECT => %r' % product)
                continue
            t2 = time.time()
            self.log('Time taken by product correct: {}'.format(t2 - t1))

            t1 = time.time()
            product['metadata']['mts_stock_code'] = find_mts_stock_code(
                product, spider_name=self.name, log=self.log)
            t2 = time.time()
            self.log('Time taken by mts stock: {}'.format(t2 - t1))

            collected_products.append(product)

        min_price_products = {}
        for product in collected_products:
            key = "%s-%s-%s-%s-%s-%s-%s" % (
                product['brand'], product['name'],
                product['metadata']['fitting_method'],
                product['metadata']['full_tyre_size'],
                product['metadata']['xl'], product['metadata']['run_flat'],
                product['metadata']['manufacturer_mark'])
            if key in min_price_products:
                if product['price'] < min_price_products[key]['price']:
                    min_price_products[key] = product
            else:
                min_price_products[key] = product

        for product in min_price_products.values():
            self.seen_ids.add(product['identifier'])
            yield product
Beispiel #7
0
    def parse(self, response):
        products = response.xpath('//div[@class="results"]')

        pages = response.xpath(
            '//p[contains(text(),"Page")]//a/@href').extract()
        for page in pages:
            yield Request(response.urljoin(page), meta=response.meta)

        for product in products:
            loader = ProductLoader(item=Product(), selector=product)
            # the full name of the tyre (name variable) is used to extract metadata (i.e. run flat, xl),
            # the pattern should be set as the product's name
            name = ' '.join(
                map(
                    unicode.strip,
                    product.select('.//div[@class="resultsLeft"]/div'
                                   '//text()[normalize-space()]').extract()))
            name += name + ' %s' % ' '.join(
                map(
                    unicode.strip,
                    product.select(
                        './/div[@class="t_size"]//text()[normalize-space()]').
                    extract()))
            loader.add_xpath(
                'name',
                './/div[@class="resultsLeft"]/div//a/i/b/text()[normalize-space()]'
            )
            brand = product.select(
                './/div[@class="resultsLeft"]/div/b//text()[normalize-space()]'
            ).extract()[0].strip()

            # skip winter tyres
            if product.select(
                    './/img[contains(@alt,"Winter / cold weather tyres")]'):
                continue
            if product.select(
                    './/img[contains(@alt,"Wi") or contains(@src,"/simg/hiver.png")]'
            ):
                continue
            loader.add_value('brand', unify_brand(brand))
            loader.add_value(
                'category',
                find_brand_segment(loader.get_output_value('brand')))
            fitting_method = 'Fitted'

            url = product.select('.//a[i[b]]/@href')[0].extract()
            url = response.urljoin(url)
            url = re.sub('cart_id=[^&]*', '', url)
            loader.add_value('url', url)

            image_url = product.select(
                './/a/img[@align="left"]/@src').extract()
            if image_url:
                loader.add_value('image_url', response.urljoin(image_url[0]))

            identifier = urlparse.parse_qs(
                urlparse.urlparse(url).query)['typ'][0]
            loader.add_value('identifier', identifier)
            price = ''.join(
                product.select(
                    './/div[@class="price"]/font/b//text()[normalize-space()]'
                ).extract())
            price = re.findall(r"\d+.\d+", price) if price else '0.0'
            loader.add_value('price', price)

            data = parse_pattern(name)
            if not data:
                # log.msg("ERROR %s [%s]" % (name, response.url))
                # self.errors.append("Error parsing: %s. URL: %s" % (name, response.url))
                continue

            metadata = MicheldeverMeta()
            metadata['aspect_ratio'] = data['Aspect_Ratio']
            metadata['rim'] = data['Rim']
            metadata['speed_rating'] = data['Speed_Rating']

            metadata['width'] = data['Width']
            metadata['fitting_method'] = fitting_method
            metadata['load_rating'] = data['Load_Rating']
            metadata['alternative_speed_rating'] = ''
            xl = 'XL' in name
            metadata['xl'] = 'Yes' if xl else 'No'

            run_flat_found = is_run_flat(name)
            run_flat = 'run flat' in name.lower() or 'runflat' in name.lower(
            ) or run_flat_found
            metadata['run_flat'] = 'Yes' if run_flat else 'No'
            manufacturer_mark = product.select(
                './/div[@class="t_size"]/b/a[contains(@onmouseover,"Original") or '
                'contains(@onmouseover,"BMW") or contains(@onmouseover,"Porsche")]'
                '/@name[normalize-space()]').extract()
            manufacturer_mark = manufacturer_mark[0].strip(
            ) if manufacturer_mark else []
            metadata['manufacturer_mark'] = find_man_mark(
                manufacturer_mark) if manufacturer_mark else ''
            metadata['full_tyre_size'] = '/'.join(
                (metadata['width'], metadata['aspect_ratio'], metadata['rim'],
                 metadata['load_rating'], metadata['speed_rating']))

            try:
                fuel, grip, noise = map(
                    unicode.strip,
                    product.select(
                        './/div[@class="tyre_label_short"]//text()').extract())
                metadata['fuel'] = fuel
                metadata['grip'] = grip
                metadata['noise'] = noise.replace('dB', '').strip()
            except:
                metadata['fuel'] = ''
                metadata['grip'] = ''
                metadata['noise'] = ''

            product = loader.load_item()
            product['metadata'] = metadata

            if not is_product_correct(product):
                continue

            if product['identifier'] in self.ip_codes:
                ip_code = self.ip_codes[product['identifier']]
                product['sku'] = ip_code
                product['metadata']['mts_stock_code'] = find_mts_stock_code(
                    product,
                    spider_name=self.name,
                    log=self.log,
                    ip_code=ip_code)
                yield product
            else:
                # We can't found IP code on products list, unfortunatelly we must extract it from product page
                yield Request(product['url'],
                              meta={'product': product},
                              callback=self.parse_ipcode)
Beispiel #8
0
    def parse(self, response):
        try:
            hxs = HtmlXPathSelector(response)
        except AttributeError:
            msg = 'Error getting selector on page for row: %s' % response.meta[
                'row']
            self.log('[ERROR] %s' % msg)
            self.errors.append(msg)
            return

        row = response.meta['row']

        json_data = None
        for line in hxs.extract().split('\n'):
            if "JsonObject = " in line:
                json_data = json.loads(
                    line.replace('JsonObject = ', '').replace('; \r', ''))

        products = json_data['Rest'] + json_data['Deals']

        collected_products = []

        for product_info in products:
            # skip winter tyres
            if product_info['WinterTyre']:
                continue

            loader = ProductLoader(item=Product(), selector=product_info)
            loader.add_value('name', product_info['ModelName'])
            brand = product_info['Manufacturer']

            loader.add_value('brand', unify_brand(brand))
            loader.add_value(
                'category',
                find_brand_segment(loader.get_output_value('brand')))
            identifier = product_info['PrimaryId']
            fitting_method = 'Delivered'

            url = '/catalogue' + product_info[
                'CatalogueUrl'] + '/f?tyre=' + str(product_info['PrimaryId'])
            loader.add_value('url', urljoin(get_base_url(response), url))

            image_url = product_info.get('ModelImageLarge')
            if not image_url:
                image_url = product_info.get('ModelImage')

            if image_url:
                image_url = image_url.split('src="')[-1].split('"')[0]
                loader.add_value('image_url',
                                 urljoin(get_base_url(response), image_url))

            loader.add_value('identifier',
                             str(identifier) + '-' + fitting_method)
            price = product_info['SellingPrice']
            loader.add_value('price', price)

            spec = product_info['SpecificationName']

            metadata = MicheldeverMeta()
            # metadata['mts_stock_code'] = row['MTS Stockcode']
            metadata['aspect_ratio'] = row['Aspect Ratio']
            metadata['rim'] = row['Rim']
            metadata['speed_rating'] = spec.split()[-1]
            metadata['width'] = row['Width']

            metadata['fitting_method'] = fitting_method
            load_rating = product_info['LoadRatingName']
            metadata['load_rating'] = load_rating
            metadata['alternative_speed_rating'] = ''
            xl = product_info['Reinforced']
            metadata['xl'] = 'Yes' if xl else 'No'
            run_flat = product_info['RunFlat']
            metadata['run_flat'] = 'Yes' if run_flat else 'No'
            manufacturer_mark = product_info['Variant']
            if manufacturer_mark:
                manufacturer_mark = manufacturer_mark.split()[0].strip()

            metadata['manufacturer_mark'] = find_man_mark(
                manufacturer_mark) if manufacturer_mark else ''

            metadata['full_tyre_size'] = '/'.join(
                (row['Width'], row['Aspect Ratio'], row['Rim'],
                 metadata['load_rating'], metadata['speed_rating']))

            product = loader.load_item()
            product['metadata'] = metadata

            if not is_product_correct(product):
                continue

            product['metadata']['mts_stock_code'] = find_mts_stock_code(
                product, spider_name=self.name, log=self.log)

            new_speed_rating = get_speed_rating(product)
            new_alt_speed = get_alt_speed(product)
            product['metadata']['alternative_speed_rating'] = new_alt_speed if new_alt_speed else \
                product['metadata']['speed_rating'] if product['metadata']['speed_rating'] != new_speed_rating else ''
            product['metadata']['speed_rating'] = new_speed_rating

            # Do not collect "Delivered" tyres
            # yield product

            product['price'] = product_info['FullyFittedPrice']
            fitting_method = 'Fitted'
            product['identifier'] = str(identifier) + '-' + fitting_method
            product['metadata']['fitting_method'] = fitting_method
            collected_products.append(product)

        min_price_products = {}
        for product in collected_products:
            key = "%s-%s-%s-%s-%s-%s-%s" % (
                product['brand'], product['name'],
                product['metadata']['fitting_method'],
                product['metadata']['full_tyre_size'],
                product['metadata']['xl'], product['metadata']['run_flat'],
                product['metadata']['manufacturer_mark'])
            if key in min_price_products:
                if product['price'] < min_price_products[key]['price']:
                    min_price_products[key] = product
            else:
                min_price_products[key] = product

        for product in min_price_products.values():
            yield product