def parse(self, response): for i in range(1, 21): item = CrawlerwebItem() name = response.xpath('/html/body/div[8]/ul/li[%d]/div[3]/text()' % i).extract() cj = response.xpath('/html/body/div[8]/ul/li[%d]/p[1]/text()' % i).extract() gg = response.xpath('/html/body/div[8]/ul/li[%d]/p[2]/text()' % i).extract() xq = response.xpath( '/html/body/div[8]/ul/li[%d]/p[3]/span[1]/text()' % i).extract() price = response.xpath( '/html/body/div[8]/ul/li[%d]/div[2]/text()' % i).extract_first() price_1 = re.findall(r"\d+\.?\d*", price) price2 = response.xpath( '/html/body/div[8]/ul/li[%d]/div[2]/span[1]/text()' % i).extract() price3 = response.xpath( '/html/body/div[8]/ul/li[%d]/div[1]/p/span[2]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price_1 item['price2'] = price2 item['price3'] = price3 yield item
def parse(self, response): # print(response.text) time.sleep(1) for i in range(1, 41): time.sleep(1) item = CrawlerwebItem() name = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[1]/a/text()' % i).extract() cj = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[2]/text()' % i).extract() gg = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[3]/span/text()' % i).extract() xq = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[6]/text()' % i).extract() price = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[7]/span[1]/text()' % i).extract() # price2 = response.xpath('/html/body/div[4]/div/div[4]/ul/li[%d]/p[8]/span[2]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price # item['price2'] = price2 yield item
def parse(self, response): for i in range(1, 5): time.sleep(1) item = CrawlerwebItem() name = response.xpath( '//*[@id="special-zoneT"]/div[%d]/a/h1/span/text()' % i).extract() cj = response.xpath( '//*[@id="special-zoneT"]/div[%d]/a/section/p[1]/text()' % i).extract() gg = response.xpath( '//*[@id="special-zoneT"]/div[%d]/a/section/p[2]/text()' % i).extract() xq = response.xpath( '//*[@id="special-zoneT"]/div[%d]/a/section/div[1]/p[1]/text()' % i).extract() price = response.xpath( '//*[@id="special-zoneT"]/div[%d]/a/section/div[2]/p/span/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item
def parse(self, response): # print(response.url) # print(response.body.decode('utf-8')) for i in range(1, 21): time.sleep(1) item = CrawlerwebItem() name = response.xpath( '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/div[1]/text()' % i).extract() cj = response.xpath( '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[2]/text()' % i).extract() gg = response.xpath( '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[3]/span/text()' % i).extract() xq = response.xpath( '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[4]/span[1]/text()' % i).extract() price = response.xpath( '/html/body/div[7]/div[4]/div/ul/li[%d]/div[2]/div[1]/p[1]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item
def parse_profile(self, response): yzm = response.xpath('//*[@id="captchaImg"]/src').extract() location = 'F:/pyhcarm/ScrapyPage/ScrapyPage/yzm/yzm.jpg' urllib.request.urlretrieve(yzm[0], filename=location) captcha_value = input() # print(response.text) for i in range(1, 21): time.sleep(1) item = CrawlerwebItem() name = response.xpath( '//*[@id="wrapper"]/div[4]/div/div[%d]/div[2]/div[2]/span[4]/text()' % i).extract() cj = response.xpath( '//*[@id="wrapper"]/div[4]/div/div[%d]/div[2]/div[4]/text()' % i).extract() xq = response.xpath( '//*[@id="wrapper"]/div[4]/div/div[%d]/div[1]/div[2]/div/text()' % i).extract() price = response.xpath( '//*[@id="wrapper"]/div[4]/div/div[1]/div[2]/div[1]/strong/text()' % i).extract() item['name'] = name item['cj'] = cj item['xq'] = xq item['price'] = price yield item
def parse(self, response): # print(response.text) for i in range(1, 4): time.sleep(1) item = CrawlerwebItem() name = response.css( '#datu > div > ul > li:nth-child(%d) > div.datu-mingzi::text' % i).extract() cj = response.xpath('//*[@id="datu"]/div/ul/li[%d]/div[4]/text()' % i).extract() gg = response.xpath( '//*[@id="datu"]/div/ul/li[%d]/div[5]/span/text()' % i).extract() xq = response.xpath( '//*[@id="datu"]/div/ul/li[%d]/div[6]/span[1]/text()' % i).extract() price = response.xpath( '//*[@id="datu"]/div/ul/li[%d]/div[2]/div[1]/text()' % i).extract() price2 = response.xpath( '//*[@id="datu"]/div/ul/li[%d]/div[2]/div[2]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price item['price2'] = price2 yield item
def parse_profile(self, response): # print(response.text) for i in range(1, 5): time.sleep(1) item = CrawlerwebItem() name = response.xpath('//*[@id="pro_list1"]/li[%d]/p[2]/a/text()' % i).extract() cj = response.xpath('//*[@id="pro_list1"]/li[%d]/p[3]/text()' % i).extract() gg = response.xpath('//*[@id="pro_list1"]/li[%d]/p[4]/text()' % i).extract() xq = response.xpath('//*[@id="pro_list1"]/li[%d]/p[5]/text()' % i).extract() price = response.xpath( '//*[@id="pro_list1"]/li[%d]/p[1]/span[1]/span/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item next_page = response.xpath("//*[text()='下一页']/@href").extract_first() if next_page is not None: next_page1 = response.urljoin(next_page) yield scrapy.Request(next_page1, callback=self.parse_profile)
def parse(self, response): # print(response.text) for i in range(1, 41): # time.sleep(1) item = CrawlerwebItem() name = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[1]/a/text()' % i).extract() cj = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[2]/text()' % i).extract() gg = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[3]/span/text()' % i).extract() xq = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[6]/text()' % i).extract() price = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[7]/span[1]/text()' % i).extract() price2 = response.xpath( '/html/body/div[4]/div/div[4]/ul/li[%d]/p[8]/span[2]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price item['price2'] = price2 yield item next_page = response.xpath("//*[text()='下一页']/@href").extract_first() if next_page is not None: next_page = response.urljoin(next_page) yield scrapy.Request(next_page, callback=self.parse)
def parse(self, response): # print(response.url) # print(response.body.decode('utf-8')) for i in range(1, 7): item = CrawlerwebItem() name = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[1]/text()' % i).extract() cj = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[2]/text()' % i).extract() price = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[3]/span/text()' % i).extract() item['name'] = name item['cj'] = cj item['price'] = price yield item
def parse(self, response): for i in range(1, 41): time.sleep(1) item = CrawlerwebItem() name = response.xpath('//*[@id="pageContent"]/div/div[%d]/h1/text()' % i).extract() cj = response.xpath('//*[@id="pageContent"]/div/div[%d]/p/text()' % i).extract() gg = response.xpath('//*[@id="pageContent"]/div/div[%d]/section[1]/p[1]/text()' % i).extract() xq = response.xpath('//*[@id="pageContent"]/div/div[%d]/section[2]/p[1]/text()' % i).extract() price = response.xpath('//*[@id="pageContent"]/div/div[%d]/h2/span/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item
def parse(self, response): for i in range(1, 21): time.sleep(1) item = CrawlerwebItem() name = response.xpath('/html/body/div[8]/ul/li[%d]/div[3]/text()' % i).extract() cj = response.xpath('/html/body/div[8]/ul/li[%d]/p[1]/text()' % i).extract() gg = response.xpath('/html/body/div[8]/ul/li[%d]/p[2]/text()' % i).extract() xq = response.xpath('/html/body/div[8]/ul/li[%d]/p[3]/span[1]/text()' % i).extract() price = response.xpath('/html/body/div[8]/ul/li[%d]/div[1]/p/span[2]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item
def parse_profile(self, response): # print(response.text) for i in range(1, 21): time.sleep(5) item = CrawlerwebItem() name = response.xpath('//*[@id="pro_list1"]/li[%d]/p[1]/a/text()' % i).extract() cj = response.xpath('//*[@id="pro_list1"]/li[%d]/p[3]/text()' % i).extract() gg = response.xpath('//*[@id="pro_list1"]/li[%d]/p[4]/span/text()' % i).extract() xq = response.xpath('//*[@id="pro_list1"]/li[%d]/p[6]/span[1]/i/text()' % i).extract() price = response.xpath('//*[@id="pro_list1"]/li[%d]/p[2]/span[2]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item
def parse(self, response): # print(response.url) # print(response.body.decode('utf-8')) for i in range(1, 5): item = CrawlerwebItem() name = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[1]/text()' % i).extract() cj = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[2]/text()' % i).extract() price = response.xpath('/html/body/div[2]/div[2]/div[1]/ul/li[%d]/p[3]/span/text()' % i).extract() item['name'] = name item['cj'] = cj item['price'] = price yield item next_page = response.xpath('/html/body/div[2]/div[2]/div[2]/span[12]/a/@href').extract_first() if next_page is not None: next_page1 = response.urljoin(next_page) yield scrapy.Request(url=next_page1, callback=self.parse, dont_filter=True)
def parse(self, response): # print("<<<<<<<<" + response.text) for i in range(1, 5): time.sleep(1) item = CrawlerwebItem() name = response.xpath('/html/body/ul/li[%d]/p[2]/a/text()' % i).extract() cj = response.xpath('/html/body/ul/li[%d]/p[3]/text()' % i).extract() gg = response.xpath('/html/body/ul/li[%d]/p[4]/text()' % i).extract() xq = response.xpath('/html/body/ul/li[%d]/p[5]/span[1]/text()' % i).extract() price = response.xpath('/html/body/ul/li[%d]/p[1]/span[1]/text()' % i).extract() item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item
def parse(self, response): print("<<<<<<<<" + response.text) res = json.loads(response.text) for i in range(1, 10): time.sleep(1) item = CrawlerwebItem() name = res[int(i)]['name'] cj = res[int(i)]['production'] gg = res[int(i)]['norms'] xq = res[int(i)]['exp'] price = res[int(i)]['wholesale'] item['name'] = name item['cj'] = cj item['gg'] = gg item['xq'] = xq item['price'] = price yield item