def reptileNovelByClassify(classify): db = Db() target_url = 'https://www.biquge5200.cc/' + classify['path'] try: r = requests.get(target_url) root = etree.HTML(r.text) novel_list = root.xpath('//div[@class="r"]//li') arr = [] for novel in novel_list: url = novel.xpath('span[@class="s2"]/a/@href')[0] book_name = novel.xpath('span[@class="s2"]/a/text()')[0] author_name = novel.xpath('span[@class="s5"]/text()')[0] classify_id = classify['id'] arr.append((url, book_name, author_name, classify_id)) print('抓取 %s' % book_name) print('开始保存数据....') db.insertMany( 'insert into gysw_novel (`book_url`, `book_name`, `author_name`, `classify_id`) values (%s, %s, %s, %s)', tuple(arr)) except Exception as e: print(e) print('操作结束') db.close()
def novel(): db = Db() classifies = db.selectAll('select * from gysw_classify') for classify in classifies: target_url = 'https://www.biquge5200.cc/' + classify['path'] try: r = requests.get(target_url) root = etree.HTML(r.text) novel_list = root.xpath('//div[@class="r"]//li') arr = [] for novel in novel_list: url = novel.xpath('span[@class="s2"]/a/@href')[0] book_name = novel.xpath('span[@class="s2"]/a/text()')[0] author_name = novel.xpath('span[@class="s5"]/text()')[0] classify_id = classify['id'] arr.append((url, book_name, author_name, classify_id)) print('开始保存数据....') db.insertMany( 'insert into gysw_novel (`book_url`, `book_name`, `author_name`, `classify_id`) values (%s, %s, %s, %s)', tuple(arr)) # db.close() except Exception as e: print('cuowu chu xian ', e) print('操作结束') db.close()
def reptileIndexClassify(self): print('爬取首页分类数据:开始:(classify/reptileIndexClassify)...') target_url = 'https://www.biquge5200.com/modules/article/search.php' try: r = requests.get(target_url) root = etree.HTML(r.text) classifies = root.xpath('//div[@class="nav"]//li[position()>2]') # arr1 = [] for classify in classifies: path = classify.xpath('a/@href')[0].split('/')[-2] desc = classify.xpath('a/text()')[0] arr1.append((path, desc)) db = Db() db.insertMany( 'insert ignore into gysw_classify (`path`, `desc`) values (%s, %s)', tuple(arr1)) # db.insertOne('insert ignore into gysw_classify(`path`, `desc`) values ("xxx2", "yyy2")') db.close() print('爬取首页分类数据:成功:(classify/reptileIndexClassify)...') except Exception as e: print('爬取首页分类数据:失败:(classify/reptileIndexClassify)...') print(e)