Exemple #1
0
def get_book_msg(book_id):
    url = url0 + get_real_book_id(book_id)
    res = mget(url)
    if res.status_code == 200:
        html = etree.HTML(res.content)
        info = html.xpath('//div[@id="info"]')[0]
        book_name = info.xpath('.//h1/text()')[0].strip()
        temp = info.xpath('./p/text()')
        temp.remove(',')
        # print(temp)
        author = temp[0].split(':')[-1].strip()
        state = temp[1].split(':')[-1].strip()
        update_time = temp[2].split(':')[-1].strip()
        intro = html.xpath('string(//div[@id="intro"])').strip().split('各位书友')[0] or '暂无'   # 小说内容
        DBPool.insert_item(table_name='books', cols_tuple=books_cols, values_tuple=(book_id, book_name, author, state, update_time, intro))
        # return
        div_list = html.xpath('//div[@id="list"]')[0]
        chapters_link = div_list.xpath('.//dd//a/@href')    # 章节链接
        chapters_title = div_list.xpath('.//dd//a/text()')  # 章节标题
        L = len(chapters_link)
        if L == len(chapters_title):
            d = [
                [findall(pattern_for_chapter_link_id, link)[0], book_id, title.split(' ', 1)[-1], 0, '']
                for link, title in zip(chapters_link, chapters_title)
                ]
            DBPool.insert_item(table_name='chapters', cols_tuple=chapters_cols, values_tuple=d)
        else:
            print('book_id: ', book_id, 'book_name:', book_name, '--------------------解析错误')
    else:
        print('book_id: ', book_id, '--------------------响应失败_', res.status_code)
Exemple #2
0
def download_chapter(book_id=6513, chapter_id=1443774):
    url = url0 + get_real_book_id(book_id) + '/' + str(chapter_id) + '.html'
    res = mget(url)
    if res.status_code == 200:
        html = etree.HTML(res.content)
        content = html.xpath('string(//div[@id="content"])')
        # print(content)
        content = content.strip().replace("'", "''")
        sql = "update chapters set state=1, content='%s' where chapter_id=%s " % (content, chapter_id)
        try:
            DBPool.exe_sql(sql)
            print('book_id: ', book_id, 'chapter_id', chapter_id, '已保存')
        except Exception as e:
            print('book_id: ', book_id, 'chapter_id', chapter_id, '--------------------content保存失败')
            print(format_exc(), e)
    else:
        print('book_id: ', book_id, 'chapter_id', chapter_id, '--------------------响应失败_', res.status_code)
Exemple #3
0
def download_book(book_id=6513):
    sql = "select chapter_id from chapters where book_id=%d and state=0" % book_id
    res = DBPool.exe_sql(sql)     # 二维tuple
    if res:
        for item in res:
            try:
                download_chapter(book_id, item[0])
            except Exception as e:
                print(format_exc(), e)
Exemple #4
0
def download_chapters(chapters_n=100):
    sql = "select chapter_id, book_id from chapters where state=0 limit %d" % chapters_n
    res = DBPool.exe_sql(sql)  # 二维tuple
    if res:
        for item in res:
            try:
                download_chapter(item[1], item[0])
            except Exception as e:
                print(format_exc(), e)
Exemple #5
0
def download_chapters_in_thread(chapters_n=100, thread_n=5):
    sql = "select chapter_id, book_id from chapters where state=0 limit %d" % chapters_n
    res = DBPool.exe_sql(sql)  # 二维tuple
    if res:
        step = ceil(chapters_n / thread_n)
        threads = []
        for i in range(thread_n):
            threads.append(Thread(name=str(i), target=download_chapter_cell, args=(res[i*step: (i+1)*step],)))
        for t in threads:
            t.start()
Exemple #6
0
def Test(iterations=15):
    try:
        dbModuleName = 'MySQLdb'
        dbModule = __import__(dbModuleName)
        pool = DBPool(dbModule,
                      10,
                      host='localhost',
                      user='******',
                      passwd='test',
                      db='test')
        for i in range(iterations):
            db = pool.getConnection()
            cursor = db.cursor()
            cursor.execute("select * from test")
            print i, cursor.fetchall()
            db.close()
    except:
        import traceback
        traceback.print_exc()
        print 'You need the MySQLdb adapter and a test database for this example'
Exemple #7
0
def download_book_in_thread(book_id=6513, thread_n=5):
    sql = "select chapter_id from chapters where book_id=%d and state=0" % book_id
    res = DBPool.exe_sql(sql)     # 二维tuple
    if res:
        L = len(res)
        step = ceil(L / thread_n)
        threads = []
        for i in range(thread_n):
            threads.append(Thread(name=str(i), target=download_book_cell, args=(book_id, res[i * step: (i + 1) * step])))
        for t in threads:
            t.start()