def qingqiu(self): links_list = [] i = 1 x = '//div[@class="paginationControl"]/preceding-sibling::ul//a' print('开始请求一级页面.....') while True: html = ktgg.request(self.url, i) # 判断请求是超过了页面总数 l = re.findall('上一页', html) if l == []: break # xpath匹配出链接和文本 links = ktgg.parse(html, x) links_list.extend(links) i += 1 print('请求到详情页数量是: ' + str(len(links_list))) return links_list
def qingqiu(self): links_list = [] i = 1 x = '//div[@id="list"]//li/span/a' print('开始请求一级页面.....') while True: html = ktgg.request(self.url, i) # 判断请求是超过了页面总数 l = re.findall('上一页', html) if l == []: break # xpath匹配出链接和文本 links = ktgg.parse(html, x) links_list.extend(links) i += 1 print('请求到详情页数量是: ' + str(len(links_list))) return links_list