Esempio n. 1
0
 def qingqiu(self):
     links_list = []
     i = 1
     x = '//div[@class="paginationControl"]/preceding-sibling::ul//a'
     print('开始请求一级页面.....')
     while True:
         html = ktgg.request(self.url, i)
         # 判断请求是超过了页面总数
         l = re.findall('上一页', html)
         if l == []:
             break
         # xpath匹配出链接和文本
         links = ktgg.parse(html, x)
         links_list.extend(links)
         i += 1
     print('请求到详情页数量是: ' + str(len(links_list)))
     return links_list
Esempio n. 2
0
 def qingqiu(self):
     links_list = []
     i = 1
     x = '//div[@id="list"]//li/span/a'
     print('开始请求一级页面.....')
     while True:
         html = ktgg.request(self.url, i)
         # 判断请求是超过了页面总数
         l = re.findall('上一页', html)
         if l == []:
             break
         # xpath匹配出链接和文本
         links = ktgg.parse(html, x)
         links_list.extend(links)
         i += 1
     print('请求到详情页数量是: ' + str(len(links_list)))
     return links_list