def childes( selector: SelectorList, parent_tag: str, ) -> SelectorList: if not isinstance(parent_tag, str): raise TypeError('Given `parent_tag` is not `str` object.') childes_selector = SelectorList() iterate_selector_string_template = parent_tag + ' > :nth-child({i})' i = 1 # starting the iteration while True: child = selector.css(iterate_selector_string_template.format(i=i)) if child: childes_selector.append(child) i += 1 else: return childes_selector
def parse_product(self, response): pic = response.xpath('//ul[@id="J_UlThumb"]/li/a/img/@src').extract() detail = response.xpath('//div[@class="attributes" and @id="attributes"]').extract() color_pics = response.xpath('//dd/ul[contains(@class,"tm-clear J_TSaleProp tb-img")]/li') total_list = response.xpath('//dd/ul[contains(@class,"tm-clear J_TSaleProp")]/li') json_getter = response.xpath('//div[@class="tm-clear"]/script[3]').extract() extract_pic = print len(extract_pic) size_list = SelectorList() for i in total_list: if i.xpath('a/span/text()').extract()[0] not in color_pics.xpath('a/span/text()').extract(): size_list.append(i) # cnt = 0 # sku_type = 0 # for i in total_list: # cnt = cnt + 1 # if i.xpath('a/span/text()').extract()[0] not in color_pics.xpath('a/span/text()').extract(): # size_list.append(i) # sku_type = cnt # if sku_type > len(total_list)-len(color_pics): # sku_type = 0 # else: # sku_type = 1 # # if len(size_list)==0: # sku_type = 3 # # cut_line = len(total_list)-len(color_pics) str_val_map = {} for i in range(len(total_list)): v = total_list[i].xpath('@data-value').extract() n = total_list[i].xpath('a/span/text()').extract() if len(n)>0 and len(v)>0: str_val_map[n[0]] = v[0] else: print "Value Error" #size_list = total_list[:cut_line] st = json_getter[0].split('TShop.Setup(')[1] info_dict = self.python_getter(st) skuMap = info_dict["valItemInfo"]["skuMap"] product_img = [] for i in pic: tmp = self.resize_pic(i) product_img.append(tmp) # color_set = selen.single_page(response.url) color_set = [] for i in color_pics: color = {} color['color'] = i.xpath('@title').extract()[0] try: tmp_str = i.xpath('a/@style').extract()[0] tmp_str = tmp_str[tmp_str.find('(')+3:tmp_str.rfind(')')] tmp_str = tmp_str[0:tmp_str.rfind('_')] color['image_url'] = tmp_str except: color['image_url'] = product_img[0] color['alternative_image_urls'] = product_img color['pricing_list'] = self.get_pricing(skuMap, str_val_map, i, size_list, response.meta['price']) if len(color['pricing_list'])>0: color_set.append(color) item = MTSGetdataItem() item['product_url'] = response.meta['product_url'] item['item_id'] = response.meta['item_id'] item['title'] = response.meta['title'] item['brand'] = 'Midi' item['merchant'] = 'Tmall' item['product_description'] = '' item['product_detail'] = detail[0] item['colors'] = color_set item['categories'] = info_dict['itemDO']['categoryId'] if len(item['colors'])>0: yield item