Ejemplo n.º 1
0
    def parse(self, response):
        for i in range(1, 21):
            item = CrawlerwebItem()
            name = response.xpath('/html/body/div[8]/ul/li[%d]/div[3]/text()' %
                                  i).extract()
            cj = response.xpath('/html/body/div[8]/ul/li[%d]/p[1]/text()' %
                                i).extract()
            gg = response.xpath('/html/body/div[8]/ul/li[%d]/p[2]/text()' %
                                i).extract()
            xq = response.xpath(
                '/html/body/div[8]/ul/li[%d]/p[3]/span[1]/text()' %
                i).extract()
            price = response.xpath(
                '/html/body/div[8]/ul/li[%d]/div[2]/text()' %
                i).extract_first()
            price_1 = re.findall(r"\d+\.?\d*", price)
            price2 = response.xpath(
                '/html/body/div[8]/ul/li[%d]/div[2]/span[1]/text()' %
                i).extract()
            price3 = response.xpath(
                '/html/body/div[8]/ul/li[%d]/div[1]/p/span[2]/text()' %
                i).extract()

            item['name'] = name
            item['cj'] = cj
            item['gg'] = gg
            item['xq'] = xq
            item['price'] = price_1
            item['price2'] = price2
            item['price3'] = price3
            yield item
Ejemplo n.º 2
0
 def parse(self, response):
     # print(response.text)
     time.sleep(1)
     for i in range(1, 41):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[1]/a/text()' %
             i).extract()
         cj = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[2]/text()' %
             i).extract()
         gg = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[3]/span/text()' %
             i).extract()
         xq = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[6]/text()' %
             i).extract()
         price = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[7]/span[1]/text()' %
             i).extract()
         # price2 = response.xpath('/html/body/div[4]/div/div[4]/ul/li[%d]/p[8]/span[2]/text()' % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         # item['price2'] = price2
         yield item
Ejemplo n.º 3
0
 def parse(self, response):
     for i in range(1, 5):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath(
             '//*[@id="special-zoneT"]/div[%d]/a/h1/span/text()' %
             i).extract()
         cj = response.xpath(
             '//*[@id="special-zoneT"]/div[%d]/a/section/p[1]/text()' %
             i).extract()
         gg = response.xpath(
             '//*[@id="special-zoneT"]/div[%d]/a/section/p[2]/text()' %
             i).extract()
         xq = response.xpath(
             '//*[@id="special-zoneT"]/div[%d]/a/section/div[1]/p[1]/text()'
             % i).extract()
         price = response.xpath(
             '//*[@id="special-zoneT"]/div[%d]/a/section/div[2]/p/span/text()'
             % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         yield item
Ejemplo n.º 4
0
 def parse(self, response):
     # print(response.url)
     # print(response.body.decode('utf-8'))
     for i in range(1, 21):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath(
             '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/div[1]/text()'
             % i).extract()
         cj = response.xpath(
             '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[2]/text()'
             % i).extract()
         gg = response.xpath(
             '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[3]/span/text()'
             % i).extract()
         xq = response.xpath(
             '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[4]/span[1]/text()'
             % i).extract()
         price = response.xpath(
             '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[1]/text()'
             % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         yield item
Ejemplo n.º 5
0
 def parse_profile(self, response):
     yzm = response.xpath('//*[@id="captchaImg"]/src').extract()
     location = 'F:/pyhcarm/ScrapyPage/ScrapyPage/yzm/yzm.jpg'
     urllib.request.urlretrieve(yzm[0], filename=location)
     captcha_value = input()
     # print(response.text)
     for i in range(1, 21):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath(
             '//*[@id="wrapper"]/div[4]/div/div[%d]/div[2]/div[2]/span[4]/text()'
             % i).extract()
         cj = response.xpath(
             '//*[@id="wrapper"]/div[4]/div/div[%d]/div[2]/div[4]/text()' %
             i).extract()
         xq = response.xpath(
             '//*[@id="wrapper"]/div[4]/div/div[%d]/div[1]/div[2]/div/text()'
             % i).extract()
         price = response.xpath(
             '//*[@id="wrapper"]/div[4]/div/div[1]/div[2]/div[1]/strong/text()'
             % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['xq'] = xq
         item['price'] = price
         yield item
Ejemplo n.º 6
0
 def parse(self, response):
     # print(response.text)
     for i in range(1, 4):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.css(
             '#datu > div > ul > li:nth-child(%d) > div.datu-mingzi::text' %
             i).extract()
         cj = response.xpath('//*[@id="datu"]/div/ul/li[%d]/div[4]/text()' %
                             i).extract()
         gg = response.xpath(
             '//*[@id="datu"]/div/ul/li[%d]/div[5]/span/text()' %
             i).extract()
         xq = response.xpath(
             '//*[@id="datu"]/div/ul/li[%d]/div[6]/span[1]/text()' %
             i).extract()
         price = response.xpath(
             '//*[@id="datu"]/div/ul/li[%d]/div[2]/div[1]/text()' %
             i).extract()
         price2 = response.xpath(
             '//*[@id="datu"]/div/ul/li[%d]/div[2]/div[2]/text()' %
             i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         item['price2'] = price2
         yield item
Ejemplo n.º 7
0
 def parse_profile(self, response):
     # print(response.text)
     for i in range(1, 5):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath('//*[@id="pro_list1"]/li[%d]/p[2]/a/text()' %
                               i).extract()
         cj = response.xpath('//*[@id="pro_list1"]/li[%d]/p[3]/text()' %
                             i).extract()
         gg = response.xpath('//*[@id="pro_list1"]/li[%d]/p[4]/text()' %
                             i).extract()
         xq = response.xpath('//*[@id="pro_list1"]/li[%d]/p[5]/text()' %
                             i).extract()
         price = response.xpath(
             '//*[@id="pro_list1"]/li[%d]/p[1]/span[1]/span/text()' %
             i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         yield item
     next_page = response.xpath("//*[text()='下一页']/@href").extract_first()
     if next_page is not None:
         next_page1 = response.urljoin(next_page)
         yield scrapy.Request(next_page1, callback=self.parse_profile)
Ejemplo n.º 8
0
 def parse(self, response):
     # print(response.text)
     for i in range(1, 41):
         # time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[1]/a/text()' %
             i).extract()
         cj = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[2]/text()' %
             i).extract()
         gg = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[3]/span/text()' %
             i).extract()
         xq = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[6]/text()' %
             i).extract()
         price = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[7]/span[1]/text()' %
             i).extract()
         price2 = response.xpath(
             '/html/body/div[4]/div/div[4]/ul/li[%d]/p[8]/span[2]/text()' %
             i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         item['price2'] = price2
         yield item
     next_page = response.xpath("//*[text()='下一页']/@href").extract_first()
     if next_page is not None:
         next_page = response.urljoin(next_page)
         yield scrapy.Request(next_page, callback=self.parse)
Ejemplo n.º 9
0
 def parse(self, response):
     # print(response.url)
     # print(response.body.decode('utf-8'))
     for i in range(1, 7):
         item = CrawlerwebItem()
         name = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[1]/text()' % i).extract()
         cj = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[2]/text()' % i).extract()
         price = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[3]/span/text()' % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['price'] = price
         yield item
Ejemplo n.º 10
0
 def parse(self, response):
     for i in range(1, 41):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath('//*[@id="pageContent"]/div/div[%d]/h1/text()' % i).extract()
         cj = response.xpath('//*[@id="pageContent"]/div/div[%d]/p/text()' % i).extract()
         gg = response.xpath('//*[@id="pageContent"]/div/div[%d]/section[1]/p[1]/text()' % i).extract()
         xq = response.xpath('//*[@id="pageContent"]/div/div[%d]/section[2]/p[1]/text()' % i).extract()
         price = response.xpath('//*[@id="pageContent"]/div/div[%d]/h2/span/text()' % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         yield item
Ejemplo n.º 11
0
 def parse(self, response):
         for i in range(1, 21):
             time.sleep(1)
             item = CrawlerwebItem()
             name = response.xpath('/html/body/div[8]/ul/li[%d]/div[3]/text()' % i).extract()
             cj = response.xpath('/html/body/div[8]/ul/li[%d]/p[1]/text()' % i).extract()
             gg = response.xpath('/html/body/div[8]/ul/li[%d]/p[2]/text()' % i).extract()
             xq = response.xpath('/html/body/div[8]/ul/li[%d]/p[3]/span[1]/text()' % i).extract()
             price = response.xpath('/html/body/div[8]/ul/li[%d]/div[1]/p/span[2]/text()' % i).extract()
             item['name'] = name
             item['cj'] = cj
             item['gg'] = gg
             item['xq'] = xq
             item['price'] = price
             yield item
Ejemplo n.º 12
0
 def parse_profile(self, response):
     # print(response.text)
     for i in range(1, 21):
         time.sleep(5)
         item = CrawlerwebItem()
         name = response.xpath('//*[@id="pro_list1"]/li[%d]/p[1]/a/text()' % i).extract()
         cj = response.xpath('//*[@id="pro_list1"]/li[%d]/p[3]/text()' % i).extract()
         gg = response.xpath('//*[@id="pro_list1"]/li[%d]/p[4]/span/text()' % i).extract()
         xq = response.xpath('//*[@id="pro_list1"]/li[%d]/p[6]/span[1]/i/text()' % i).extract()
         price = response.xpath('//*[@id="pro_list1"]/li[%d]/p[2]/span[2]/text()' % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         yield item
Ejemplo n.º 13
0
 def parse(self, response):
     # print(response.url)
     # print(response.body.decode('utf-8'))
     for i in range(1, 5):
         item = CrawlerwebItem()
         name = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[1]/text()' % i).extract()
         cj = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[2]/text()' % i).extract()
         price = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[3]/span/text()' % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['price'] = price
         yield item
     next_page = response.xpath('/html/body/div[2]/div[2]/div[2]/span[12]/a/@href').extract_first()
     if next_page is not None:
         next_page1 = response.urljoin(next_page)
         yield scrapy.Request(url=next_page1, callback=self.parse, dont_filter=True)
Ejemplo n.º 14
0
 def parse(self, response):
     # print("<<<<<<<<" + response.text)
     for i in range(1, 5):
         time.sleep(1)
         item = CrawlerwebItem()
         name = response.xpath('/html/body/ul/li[%d]/p[2]/a/text()' % i).extract()
         cj = response.xpath('/html/body/ul/li[%d]/p[3]/text()' % i).extract()
         gg = response.xpath('/html/body/ul/li[%d]/p[4]/text()' % i).extract()
         xq = response.xpath('/html/body/ul/li[%d]/p[5]/span[1]/text()' % i).extract()
         price = response.xpath('/html/body/ul/li[%d]/p[1]/span[1]/text()' % i).extract()
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         yield item
Ejemplo n.º 15
0
 def parse(self, response):
     print("<<<<<<<<" + response.text)
     res = json.loads(response.text)
     for i in range(1, 10):
         time.sleep(1)
         item = CrawlerwebItem()
         name = res[int(i)]['name']
         cj = res[int(i)]['production']
         gg = res[int(i)]['norms']
         xq = res[int(i)]['exp']
         price = res[int(i)]['wholesale']
         item['name'] = name
         item['cj'] = cj
         item['gg'] = gg
         item['xq'] = xq
         item['price'] = price
         yield item