def get_book_msg(book_id): url = url0 + get_real_book_id(book_id) res = mget(url) if res.status_code == 200: html = etree.HTML(res.content) info = html.xpath('//div[@id="info"]')[0] book_name = info.xpath('.//h1/text()')[0].strip() temp = info.xpath('./p/text()') temp.remove(',') # print(temp) author = temp[0].split(':')[-1].strip() state = temp[1].split(':')[-1].strip() update_time = temp[2].split(':')[-1].strip() intro = html.xpath('string(//div[@id="intro"])').strip().split('各位书友')[0] or '暂无' # 小说内容 DBPool.insert_item(table_name='books', cols_tuple=books_cols, values_tuple=(book_id, book_name, author, state, update_time, intro)) # return div_list = html.xpath('//div[@id="list"]')[0] chapters_link = div_list.xpath('.//dd//a/@href') # 章节链接 chapters_title = div_list.xpath('.//dd//a/text()') # 章节标题 L = len(chapters_link) if L == len(chapters_title): d = [ [findall(pattern_for_chapter_link_id, link)[0], book_id, title.split(' ', 1)[-1], 0, ''] for link, title in zip(chapters_link, chapters_title) ] DBPool.insert_item(table_name='chapters', cols_tuple=chapters_cols, values_tuple=d) else: print('book_id: ', book_id, 'book_name:', book_name, '--------------------解析错误') else: print('book_id: ', book_id, '--------------------响应失败_', res.status_code)
def download_chapter(book_id=6513, chapter_id=1443774): url = url0 + get_real_book_id(book_id) + '/' + str(chapter_id) + '.html' res = mget(url) if res.status_code == 200: html = etree.HTML(res.content) content = html.xpath('string(//div[@id="content"])') # print(content) content = content.strip().replace("'", "''") sql = "update chapters set state=1, content='%s' where chapter_id=%s " % (content, chapter_id) try: DBPool.exe_sql(sql) print('book_id: ', book_id, 'chapter_id', chapter_id, '已保存') except Exception as e: print('book_id: ', book_id, 'chapter_id', chapter_id, '--------------------content保存失败') print(format_exc(), e) else: print('book_id: ', book_id, 'chapter_id', chapter_id, '--------------------响应失败_', res.status_code)
def download_book(book_id=6513): sql = "select chapter_id from chapters where book_id=%d and state=0" % book_id res = DBPool.exe_sql(sql) # 二维tuple if res: for item in res: try: download_chapter(book_id, item[0]) except Exception as e: print(format_exc(), e)
def download_chapters(chapters_n=100): sql = "select chapter_id, book_id from chapters where state=0 limit %d" % chapters_n res = DBPool.exe_sql(sql) # 二维tuple if res: for item in res: try: download_chapter(item[1], item[0]) except Exception as e: print(format_exc(), e)
def download_chapters_in_thread(chapters_n=100, thread_n=5): sql = "select chapter_id, book_id from chapters where state=0 limit %d" % chapters_n res = DBPool.exe_sql(sql) # 二维tuple if res: step = ceil(chapters_n / thread_n) threads = [] for i in range(thread_n): threads.append(Thread(name=str(i), target=download_chapter_cell, args=(res[i*step: (i+1)*step],))) for t in threads: t.start()
def Test(iterations=15): try: dbModuleName = 'MySQLdb' dbModule = __import__(dbModuleName) pool = DBPool(dbModule, 10, host='localhost', user='******', passwd='test', db='test') for i in range(iterations): db = pool.getConnection() cursor = db.cursor() cursor.execute("select * from test") print i, cursor.fetchall() db.close() except: import traceback traceback.print_exc() print 'You need the MySQLdb adapter and a test database for this example'
def download_book_in_thread(book_id=6513, thread_n=5): sql = "select chapter_id from chapters where book_id=%d and state=0" % book_id res = DBPool.exe_sql(sql) # 二维tuple if res: L = len(res) step = ceil(L / thread_n) threads = [] for i in range(thread_n): threads.append(Thread(name=str(i), target=download_book_cell, args=(book_id, res[i * step: (i + 1) * step]))) for t in threads: t.start()