def extract_run_flat(pattern): """ >>> extract_run_flat(u'205/55R16 91V Economy RUN FLAT') (True, u'205/55R16 91V Economy') """ run_flat = "No" pattern = fix_spaces(pattern) m = run_flat_regex.search(pattern) if m: run_flat = "Yes" pattern = pattern.replace(m.group(0), '') pattern = fix_spaces(pattern.strip()) return run_flat, pattern
def extract_reinforced(pattern): """ >>> extract_reinforced(u'205/55R16\xa094V\xa0 Wet: Performance REINFORCED') (True, u'205/55R16 94V Wet: Performance') """ xl = "No" pattern = fix_spaces(pattern) m = reinforced_regex.search(pattern) if m: xl = "Yes" pattern = pattern.replace(m.group(0), '') pattern = fix_spaces(pattern.strip()) return xl, pattern
def parse_pattern(pattern): """ >>> parse_pattern(u'205/55R16\xa091V') (u'205', u'55', u'16', u'91', u'V', '') >>> parse_pattern(u'205/55R16\xa091V\xa0Proxes CF2') (u'205', u'55', u'16', u'91', u'V', u'Proxes CF2') """ pattern = fix_spaces(pattern) m = tyre_pattern_regex1.search(pattern) if m: width, ratio, rim, load_rating, speed_rating, name = m.groups() return width, ratio, rim, load_rating, speed_rating, name m = tyre_pattern_regex2.search(pattern) if m: width, ratio, rim, load_rating, speed_rating = m.groups() return width, ratio, rim, load_rating, speed_rating, '' return None
def extract_products(self, response): hxs = HtmlXPathSelector(response) products = hxs.select( '//div[@class="listcontPART"]//div[@class="conprcbx"]') for el in products: brand = el.select('./div[@class="dec_tyrebnt"]/p/b/text()' ).extract().pop().strip() pattern = "".join( el.select( './div[@class="dec_tyrebnt"]/p/text()').extract()).strip() # skip winter tyres if 'winter' in pattern.lower(): continue xl, pattern = extract_reinforced(pattern) run_flat, pattern = extract_run_flat(pattern) res = parse_pattern(pattern) if not res: excludes = [ 'sport contact', 'advantage sport', 'expedia s02', 'zero rosso' ] if any([x in pattern.lower() for x in excludes]): continue else: msg = 'Could not parse pattern: %s' % fix_spaces( pattern).encode('utf-8') self.log('[CARTYRES] %s' % msg) self.errors.append(msg) continue width, ratio, rim, load_rating, speed_rating, name = res identifier = el.select(".//p/@onclick").re( "AddCarToShortList\('([^']*)',") url = self.start_urls[0] price = el.select( './/div[@class="dec_fittdbnt"]/h1/text()').extract().pop() price = fix_spaces(price) image_url = el.select( '../..//div[@class="uptyre_prt"]/img/@src').extract()[0] man_mark = el.select( './/div[@class="bndLGO1"]/img/@title').extract() if man_mark: man_mark = man_mark[0] if not man_mark in self.man_marks: self.man_marks.add(man_mark) else: man_mark = '' loader = ProductLoader(Product(), selector=hxs) loader.add_value('name', name) loader.add_value('identifier', identifier.pop()) loader.add_value('price', price) loader.add_value('url', url) loader.add_value('image_url', image_url) loader.add_value('brand', unify_brand(brand)) loader.add_value( 'category', find_brand_segment(loader.get_output_value('brand'))) metadata = MicheldeverMeta() metadata['width'] = width metadata['aspect_ratio'] = ratio metadata['rim'] = rim metadata['load_rating'] = load_rating metadata['speed_rating'] = speed_rating metadata['fitting_method'] = 'Fitted' metadata['run_flat'] = run_flat metadata['xl'] = xl if man_mark and man_mark in man_mark_mapping: man_code = man_mark_mapping[man_mark] else: man_code = '' metadata['manufacturer_mark'] = man_code metadata['full_tyre_size'] = '/'.join( (width, ratio, rim, load_rating, speed_rating)) product = loader.load_item() product['metadata'] = metadata if not is_product_correct(product): continue product['metadata']['mts_stock_code'] = find_mts_stock_code( product, spider_name=self.name, log=self.log) new_speed_rating = get_speed_rating(product) new_alt_speed = get_alt_speed(product) product['metadata']['alternative_speed_rating'] = new_alt_speed if new_alt_speed else \ product['metadata']['speed_rating'] if product['metadata']['speed_rating'] != new_speed_rating else '' product['metadata']['speed_rating'] = new_speed_rating yield product