def parse(self, response): # 解析搜索页 sel = Selector(response) # Xpath选择器 goods = sel.xpath('//li[@class="gl-item"]') i = 0 for good in goods: i = i + 1 item1 = goodsItem() item1['ID'] = good.xpath('./div/@data-sku').extract() if item1['ID'] in self.allids: continue self.allids.append(item1['ID']) item1['name'] = good.xpath( './div/div[@class="p-name"]/a/em/text()').extract() item1['shop_name'] = good.xpath( './div/div[@class="p-shop"]/@data-shop_name').extract() try: item1['link'] = good.xpath( './div/div[@class="p-img"]/a/@href').extract() url = "http:" + item1['link'][0] + "#comments-list" except: pass print("goodNum is", i) yield item1 #scrapy.Request(url, meta={'item': item1}, callback=self.parse_detail) self.count = self.count + 1 print('count is ', self.count, response)
def parse(self, response): # 解析搜索页 sel = Selector(response) # Xpath选择器 goods = sel.xpath('//li[@class="gl-item"]') for good in goods: item1 = goodsItem() item1['ID'] = good.xpath('./@data-sku').extract()[0] item1['name'] = good.xpath( './div/div[@class="p-name p-name-type-2"]/a/em/text()' ).extract()[0] # print(type(good.xpath('./div/div[@class="p-name p-name-type-2"]/a/em/text()').extract())) if good.xpath('./div/div[contains(@class,"p-shop")]/span/a/text()' ).extract(): item1['shop_name'] = good.xpath( './div/div[contains(@class,"p-shop")]/span/a/text()' ).extract()[0] else: item1['shop_name'] = '' item1['link'] = good.xpath( './div/div[@class="p-img"]/a/@href').extract()[0] item1['price'] = good.xpath( './div//div[@class="p-price"]/strong/i/text()').extract()[0] item1['comment_num'] = good.xpath( './div//div[@class="p-commit"]/strong/a/text()').extract()[0] print(item1['ID'], item1['name'], item1['shop_name'], item1['price']) if item1['link'].startswith('http'): url = item1['link'] else: url = "http:" + item1['link'] yield scrapy.Request(url, meta={'item': item1}, callback=self.parse_detail)
def parse(self, response): # 解析搜索页 sel = Selector(response) # Xpath选择器 goods = sel.xpath('//li[@class="gl-item"]') for good in goods: item1 = goodsItem() item1['ID'] = good.xpath('./div/@data-sku').extract() item1['name'] = good.xpath('./div/div[@class="p-name"]/a/em/text()').extract() item1['shop_name'] = good.xpath('./div/div[@class="p-shop"]/@data-shop_name').extract() item1['link'] = good.xpath('./div/div[@class="p-img"]/a/@href').extract() url = "http:" + item1['link'][0] + "#comments-list" yield scrapy.Request(url, meta={'item': item1}, callback=self.parse_detail)
def parse(self, response): # 解析搜索页 print('1,==========', response.url) sel = Selector(response) # Xpath选择器 goods = sel.xpath('//li[@class="gl-item"]') for good in goods: item1 = goodsItem() ID = good.xpath('./div/@data-sku').extract() ID = ''.join(ID) item1['ID'] = ID name = good.xpath( './div/div[@class="p-name"]/a/em/text()').extract() name = ''.join(name).strip() item1['name'] = name shop_name = good.xpath( './div/div[@class="p-shop"]/@data-shop_name').extract() shop_name = ''.join(shop_name) item1['shop_name'] = shop_name item1['link'] = good.xpath( './div/div[@class="p-img"]/a/@href').extract()[0] url = "http:" + item1['link'] + "#comments-list" yield scrapy.Request(url, meta={'item': item1}, callback=self.parse_detail)