예제 #1
0
    def parse(self, response):  # 解析搜索页
        sel = Selector(response)  # Xpath选择器
        goods = sel.xpath('//li[@class="gl-item"]')
        i = 0
        for good in goods:
            i = i + 1
            item1 = goodsItem()
            item1['ID'] = good.xpath('./div/@data-sku').extract()
            if item1['ID'] in self.allids:
                continue
            self.allids.append(item1['ID'])
            item1['name'] = good.xpath(
                './div/div[@class="p-name"]/a/em/text()').extract()
            item1['shop_name'] = good.xpath(
                './div/div[@class="p-shop"]/@data-shop_name').extract()
            try:
                item1['link'] = good.xpath(
                    './div/div[@class="p-img"]/a/@href').extract()
                url = "http:" + item1['link'][0] + "#comments-list"
            except:
                pass

            print("goodNum is", i)
            yield item1  #scrapy.Request(url, meta={'item': item1}, callback=self.parse_detail)
        self.count = self.count + 1
        print('count is ', self.count, response)
예제 #2
0
    def parse(self, response):  # 解析搜索页
        sel = Selector(response)  # Xpath选择器
        goods = sel.xpath('//li[@class="gl-item"]')
        for good in goods:
            item1 = goodsItem()
            item1['ID'] = good.xpath('./@data-sku').extract()[0]
            item1['name'] = good.xpath(
                './div/div[@class="p-name p-name-type-2"]/a/em/text()'
            ).extract()[0]
            # print(type(good.xpath('./div/div[@class="p-name p-name-type-2"]/a/em/text()').extract()))
            if good.xpath('./div/div[contains(@class,"p-shop")]/span/a/text()'
                          ).extract():
                item1['shop_name'] = good.xpath(
                    './div/div[contains(@class,"p-shop")]/span/a/text()'
                ).extract()[0]
            else:
                item1['shop_name'] = ''
            item1['link'] = good.xpath(
                './div/div[@class="p-img"]/a/@href').extract()[0]
            item1['price'] = good.xpath(
                './div//div[@class="p-price"]/strong/i/text()').extract()[0]

            item1['comment_num'] = good.xpath(
                './div//div[@class="p-commit"]/strong/a/text()').extract()[0]
            print(item1['ID'], item1['name'], item1['shop_name'],
                  item1['price'])
            if item1['link'].startswith('http'):
                url = item1['link']
            else:
                url = "http:" + item1['link']
            yield scrapy.Request(url,
                                 meta={'item': item1},
                                 callback=self.parse_detail)
예제 #3
0
 def parse(self, response):  # 解析搜索页
     sel = Selector(response)  # Xpath选择器
     goods = sel.xpath('//li[@class="gl-item"]')
     for good in goods:
         item1 = goodsItem()
         item1['ID'] = good.xpath('./div/@data-sku').extract()
         item1['name'] = good.xpath('./div/div[@class="p-name"]/a/em/text()').extract()
         item1['shop_name'] = good.xpath('./div/div[@class="p-shop"]/@data-shop_name').extract()
         item1['link'] = good.xpath('./div/div[@class="p-img"]/a/@href').extract()
         url = "http:" + item1['link'][0] + "#comments-list"
         yield scrapy.Request(url, meta={'item': item1}, callback=self.parse_detail)
예제 #4
0
    def parse(self, response):  # 解析搜索页
        print('1,==========', response.url)
        sel = Selector(response)  # Xpath选择器
        goods = sel.xpath('//li[@class="gl-item"]')
        for good in goods:
            item1 = goodsItem()
            ID = good.xpath('./div/@data-sku').extract()
            ID = ''.join(ID)
            item1['ID'] = ID
            name = good.xpath(
                './div/div[@class="p-name"]/a/em/text()').extract()
            name = ''.join(name).strip()
            item1['name'] = name
            shop_name = good.xpath(
                './div/div[@class="p-shop"]/@data-shop_name').extract()
            shop_name = ''.join(shop_name)
            item1['shop_name'] = shop_name
            item1['link'] = good.xpath(
                './div/div[@class="p-img"]/a/@href').extract()[0]
            url = "http:" + item1['link'] + "#comments-list"

            yield scrapy.Request(url,
                                 meta={'item': item1},
                                 callback=self.parse_detail)