Beispiel #1
0
def insert_info(info):
    """向数据库中插入书籍信息
    """
    with closing(db_pool.connection()) as db:
        # 更新作者
        for item in info['author'].split(','):
            db.execute("""
                REPLACE INTO `bookwriter` (`writer`, `book`)
                VALUES (%s, %s)""", item, info['name'])

        # 更新书信息
        db.execute("""
            REPLACE INTO `bookinfo`
            (`name`, `publisher`, `description`, `image`, `isbn`, `pages`,
            `year`, `language`, `fileformat`, `download`, `url`, `bookid`)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", info['name'],
            info['publisher'], info['description'], info['image'], info['isbn'],
            info['pages'], info['year'], info['language'], info['bookformat'],
            info['download'], info['url'], info['bookid'])

        # 更新相关信息
        for item in info['relations']:
            db.execute("""
                REPLACE INTO `bookrelated` (`origin`, `related`)
                VALUES (%s, %s)""", info['name'], item)
Beispiel #2
0
def get_untrack_books(max_id):
    with closing(db_pool.connection()) as db:
        id_row = db.query(""" SELECT `bookid` FROM `detail`""")
        id_in = [int(x['bookid']) for x in id_row]
        id_target = list(set(range(max_id)) - set(id_in))
        print "length of target is ", len(id_target)
        return id_target
Beispiel #3
0
def select_book_isbn():
    """获取还没从豆瓣获取书籍的isbn
    """
    with closing(db_pool.connection()) as db:
        isbn_raw = db.query("""SELECT `isbn` FROM `detail`
                WHERE `isbn10` IS NULL""")
        return isbn_raw
Beispiel #4
0
def insert(info, isbn):
    """从API获得的信息分类存入数据库
    """
    bookname = info.get('title')
    if not bookname:
        return 'Title not Found'
    bookname = bookname.get('$t')

    summary = ''
    if info.get('summary'):
        summary = info.get('summary').get('$t')

    r_average, r_max, r_min, r_count = get_rating(info.get('gd:rating'))

    api, url = get_douban_url(info.get('link'))

    isbn10, isbn13, pages, publisher, price, pubdate = get_book(info.get('db:attribute'))
    pubdate_find = re.findall(r'(\d+)', pubdate)
    if len(pubdate_find) != 3:
        pubdate = re.findall(r'(\d{4})', pubdate)[0] + "-1-1"

    with closing(db_pool.connection()) as db:
        db.execute("""
            UPDATE `detail` SET
            `isbn10`=%s, `isbn13`=%s, `price`=%s, `pubdate`=%s, `rateAVE`=%s,
            `rateMAX`=%s, `rateMIN`=%s, `rateCount`=%s, `doubanAPI`=%s,
            `doubanURL`=%s, `summary`=%s
            WHERE `isbn`=%s
            """, isbn10, isbn13, price, pubdate, r_average, r_max, r_min,
                r_count, api, url, summary, isbn)

    insert_tags(info.get('db:tag'), bookname, isbn)
Beispiel #5
0
def save_image_url_to_mysql(realimage, bookid):
    """在数据库存储真正的url地址
    """
    with closing(db_pool.connection()) as db:
        db.execute(""" UPDATE `bookinfo` SET `realimage` = %s
                   WHERE `url` LIKE '%%/%s/%%'""", realimage, bookid)
        return True
Beispiel #6
0
def save_book_size(bookid, size):
    """设置书籍文件大小
    """
    with closing(db_pool.connection()) as db:
        db.execute("""
            UPDATE `detail` SET `filesize` = %s WHERE `bookid` = %s
            """, str(size), bookid)
Beispiel #7
0
def books_without_realimage():
    """找十个realimage为Null的书ID
    """
    with closing(db_pool.connection()) as db:
        bookids = db.query("""
            SELECT `bookid` FROM `bookinfo` WHERE `realimage` IS NULL
            LIMIT 10""")
        return [item['bookid'] for item in bookids]
Beispiel #8
0
def get_book_url(bookid):
    """获取书的真实存储地址和页面地址
    """
    with closing(db_pool.connection()) as db:
        urls = db.get("""
            SELECT `download`, `url` FROM `bookinfo` WHERE `bookid` = %s
            """, bookid)
        if urls:
            return urls['download'], urls['url']
Beispiel #9
0
def get_img_url():
    """获取未存储书的封面图片地址
    """
    with closing(db_pool.connection()) as db:
        img_raw = db.query("""
                SELECT `ebookImage`, `id`
                FROM `detail` WHERE `imageURL` IS NULL""")
        print '取到', len(img_raw), '条数据'
        return img_raw
Beispiel #10
0
def get_book_url(bookid):
    """获取书的真实存储地址和页面地址
    """
    print "bookid", bookid
    with closing(db_pool.connection()) as db:
        urls = db.get("""
            SELECT `downloadURL`, `ebookURL` FROM `detail` WHERE `bookid` = %s
            """, bookid)
        if urls:
            return urls['downloadURL'], urls['ebookURL']
Beispiel #11
0
def insert_info(info):
    """
    最原始的抓取的信息入库
    """
    with closing(db_pool.connection()) as db:
        for item in info:
            db.execute("""REPLACE INTO `jandan` (`item`, `content`, `votes`)
                VALUES (%s, %s, %s)""", *item)

    return 'success'
Beispiel #12
0
def get_img_url(bookid):
    """获取指定书的封面图片地址
    """
    with closing(db_pool.connection()) as db:
        img_raw = db.get("""
            SELECT `image`, `name` FROM `bookinfo` WHERE `url` LIKE '%%/%s/%%'
            """, bookid)

        img = img_raw.get('image')
        if img:
            return config.ITEBOOKS_URL + img, img_raw.get('name') + '.jpg'
        return None
Beispiel #13
0
def insert_tags(tags, bookname, isbn):
    """插入书籍标签
    """
    if not tags:
        return None

    with closing(db_pool.connection()) as db:
        for item in tags:
            tag = item.get('@name').strip()
            count = item.get('@count')
            if (not tag) or (not count) or (len(tag) > 30):
                continue

            db.execute("""
                REPLACE INTO `tag` (`isbn`, `name`, `tag`, `count`)
                VALUES (%s, %s, %s, %s)""", isbn, bookname, tag, count)
Beispiel #14
0
def get_unparsed_content():
    """
    找点还p_count是NULL的数据行
    返回还有多少行的数据是空的
    """
    rows_per_time = 2

    with closing(db_pool.connection()) as db:
        raw = db.get("""SELECT COUNT(*) AS `count` FROM `jandan`
            WHERE `p_count` IS NULL
            """)
        raw_left = raw['count'] - rows_per_time

        contents = db.query("""SELECT `item`, `content` FROM `jandan`
            WHERE `p_count` IS NULL
            LIMIT %s""", rows_per_time)
    return contents, raw_left
Beispiel #15
0
def get_unparsed_vote():
    """
    找点vote还是空的数据行
    返回还有多少行的数据是空的
    """
    rows_per_time = 30

    with closing(db_pool.connection()) as db:
        raw = db.get("""SELECT COUNT(*) AS `count` FROM `jandan`
            WHERE `support` IS NULL OR `unsupport` IS NULL
            """)
        raw_left = raw['count'] - rows_per_time

        votes = db.query("""SELECT `item`, `votes` FROM `jandan`
            WHERE `support` IS NULL OR `unsupport` IS NULL
            LIMIT %s""", rows_per_time)
    return votes, raw_left
Beispiel #16
0
def insert(info, isbn):
    """从API获得的信息分类存入数据库
    """
    bookname = info.get("title")
    if not bookname:
        return "Title not Found"
    bookname = bookname.get("$t")

    insert_tags(info.get("db:tag"), bookname)

    summary = ""
    if info.get("summary"):
        summary = info.get("summary").get("$t")

    r_average, r_max, r_min, r_count = get_rating(info.get("gd:rating"))

    api, url = get_douban_url(info.get("link"))

    isbn10, isbn13, pages, publisher, price, pubdate = get_book(info.get("db:attribute"))

    with closing(db_pool.connection()) as db:
        db.execute(
            """
            REPLACE INTO `douban_bookinfo`
            (`isbn`, `isbn10`, `isbn13`, `pages`, `price`, `publisher`,
            `pubdate`, `rate_average`, `rate_max`, `rate_min`, `rate_count`,
            `douban_api`, `douban_url`, `summary`, `bookname`)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            """,
            isbn,
            isbn10,
            isbn13,
            pages,
            price,
            publisher,
            pubdate,
            r_average,
            r_max,
            r_min,
            r_count,
            api,
            url,
            summary,
            bookname,
        )
Beispiel #17
0
def insert_votes(votes):
    """
    支持与反对数入库
    """
    sql_head = 'UPDATE `jandan` SET `support` = CASE `item`\n'
    sql_middle = '\nEND,\n`unsupport` = CASE `item`\n'
    sql_end = '\nEND\nWHERE `item` IN ({})'.format(', '.join([x['item'] for x in votes]))
    support_list = '\n'.join(
        ['WHEN "{}" THEN "{}"'.format(x['item'], x['support']) for x in votes])
    unsupport_list = '\n'.join(
        ['WHEN "{}" THEN "{}"'.format(x['item'], x['unsupport']) for x in votes])

    sql = sql_head + support_list + sql_middle + unsupport_list + sql_end
    
    with closing(db_pool.connection()) as db:
        db.execute(sql)

    return sql
Beispiel #18
0
def update_content(contents):
    """
    格式化后的内容与替换过的图片入库
    """
    sql_head = 'UPDATE `jandan` SET `p_count` = CASE `item`\n'
    sql_middle = '\nEND,\n`content` = CASE `item`\n'
    sql_end = '\nEND\nWHERE `item` IN ({})'.format(', '.join([x['item'] for x in contents]))
    p_count_list = '\n'.join(
        ['WHEN "{}" THEN "{}"'.format(x['item'], x['p_count']) for x in contents])
    content_list = '\n'.join(
            ['WHEN "{}" THEN "{}"'.format(x['item'], x['content']) for x in contents])

    sql = sql_head + p_count_list + sql_middle + content_list + sql_end
    
    with closing(db_pool.connection()) as db:
        db.execute(sql)

    return sql
Beispiel #19
0
def select_unfetch_book_isbn():
    """获取1个还没从豆瓣获取书籍的isbn
    """
    fetch_id = kv.get("douban::fetch:id")
    if not fetch_id:
        fetch_id = 0

    with closing(db_pool.connection()) as db:
        book_count_row = db.get(" SELECT COUNT(*) AS `count` FROM `bookinfo`")
        book_count = book_count_row["count"]
        if fetch_id > book_count:
            fetch_id = 0

        isbn_raw = db.get(
            """
            SELECT `bookinfo`.`isbn` FROM `bookinfo`
            LIMIT %s, 1""",
            fetch_id,
        )

        kv.set("douban::fetch:id", fetch_id + 1)

        return isbn_raw["isbn"]
Beispiel #20
0
def save_image_url_to_mysql(url, bookid):
    """在数据库存储真正的url地址
    """
    with closing(db_pool.connection()) as db:
        db.execute(""" UPDATE `detail` SET `imageURL` = %s
                   WHERE `id` = %s""", url, bookid)
Beispiel #21
0
def get_book_undownload():
    with closing(db_pool.connection()) as db:
        bookids = db.query("""
            SELECT `bookid` FROM `detail` WHERE `filesize` IS NULL""")
        bookids = [x["bookid"] for x in bookids]
        return bookids