Esempio n. 1
0
def getLatestUpdateBooks(categorys, limit=30):
    '''
    按bookId和title获取章节信息对象
    :param bookId: 
    :param idx: 
    :return: 
    '''
    conn, csor = getDushuConnCsor()
    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)

    try:
        dictCsor.execute(
            'select id  from ' + db_dushu + " where categoryCode in %s "
            "and imgUrl != 'http://tata-img.oss-cn-shanghai.aliyuncs.com/book-default.jpg' "
            " order by updateTime desc limit %s", (categorys, limit))
        conn.commit()
    except Exception as e:
        myLogging.warning(e)

    chapObj = dictCsor.fetchallDict()

    csor.close()
    conn.close()

    return chapObj
Esempio n. 2
0
def getBookByTitle(title):
    '''
    用title获取bookObj
    :return bookObjs即: [bookObj{"id":"1",,}]: 
    '''

    conn, csor = getDushuConnCsor()
    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)

    dictCsor.execute(
        "SELECT *  from cn_dushu_book where rawUrl like"
        " 'http://api.yingyangcan.com.cn/interface/ajax/book/getbaseinfo.ajax?%' and title = '"
        + title + "';")
    conn.commit()
    results = dictCsor.fetchallDict()

    # if len(results) > 1:
    #     raise InputException('more than one book')

    bookObj = results

    csor.close()
    conn.close()

    return bookObj
Esempio n. 3
0
def cleanSubtitle():
    conn, csor = getDushuConnCsor()
    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)
    bookId = 2584584
    carry = 50000
    while bookId < 2590000:
        try:
            dictCsor.execute(
                'select id,subtitle  from ' + db_dushu +
                " where id >= %s and id <= %s and subtitle REGEXP '[0-9]{5,20}'",
                (bookId, bookId + carry))
            conn.commit()

            books = dictCsor.fetchallDict()
            for book in books:
                newSubtitle = subTitleClean(book['subtitle'])
                if not newSubtitle == book['subtitle'].encode('utf-8'):
                    myLogging.info('bookId %s update from %s to %s',
                                   book['id'],
                                   book['subtitle'].encode('utf-8'),
                                   newSubtitle)
                    updateOneFieldByOneField('subtitle', newSubtitle, 'id',
                                             book['id'])

        except Exception as e:
            myLogging.warning(e)
        bookId += carry
    chapObj = dictCsor.fetchoneDict()

    csor.close()
    conn.close()
Esempio n. 4
0
def updateCapDigest():

    conn2, csor2 = getDushuConnCsor()

    for i in range(1056363, 1722907):
        try:
            capObj = json.loads(bucket.get_object(str(i) + '.json').read())
        except Exception as e:
            print i, e
            continue

    # for cap in caps:
        cid = capObj['id']
        print cid
        bookDigest = capObj['bookUUID']
        capTitle = capObj['title']
        idx = capObj['idx']

        m2 = hashlib.md5()
        forDigest = bookDigest + capTitle + u'#' + str(idx)
        m2.update(forDigest.encode('utf-8'))
        digest2 = m2.hexdigest()

        try:
            csor2.execute(
                "update cn_dushu_acticle set digest = %s where id = %s",
                (digest2, cid))
            conn2.commit()
        except Exception as e:
            print cid, e

    csor2.close()
    conn2.close()
Esempio n. 5
0
def indexBookSuggest(st=218289):
    myLogging.info('st: %s', st)

    conn2, csor2 = getDushuConnCsor()

    csor2.execute(
        "select id,title,author from cn_dushu_book where id >= %s and operateStatus = 0 ",
        (st, ))
    conn2.commit()
    results = csor2.fetchall()
    baseUrl = DUSHU_SUGGEST_URL
    for book in results:
        id = book[0]
        title = book[1]
        author = book[2]
        # tags = book[3]

        bookObj = dict()
        sinput = []
        sinput.append(title)
        sinput.append(author)
        # if tags:
        #     ts = json.loads(tags)
        #     for t in ts:
        #         sinput.append(t)
        inputBoj = dict()
        inputBoj['input'] = sinput
        inputBoj['output'] = title + "(" + author + ')'
        bookObj['testsuggest'] = inputBoj
        try:
            r = requests.put(baseUrl + str(id), data=json.dumps(bookObj))
            print r.text
        except Exception as e:
            print bookObj, e
Esempio n. 6
0
def insertCapWithCapObj(capObj, conn2=None, csor2=None, allowUpdate=False):
    if not conn2 or not csor2:
        conn2, csor2 = getDushuConnCsor()

    # sql = "insert ignore cn_dushu_acticle (title,rawUrl,source,content,bookId,idx,digest,size,bookUUID) values" \
    #       "('%s','%s','%s','%s',%d,%d,'%s', %d, '%s')" % (
    #           capObj['title'], capObj['rawUrl'], capObj['source'], capObj['content']
    #           , capObj['bookId'], capObj['idx'], capObj['digest'], capObj['size'], capObj['bookUUID'])
    try:
        csor2.execute("insert cn_dushu_acticle (bookId,idx,digest,bookUUID,title,size) values" \
          "(%s,%s,%s,%s,%s,%s)" , (capObj['bookId'], capObj['idx'], capObj['digest'], capObj['bookUUID'], capObj['title'], capObj['size']))
        # csor2.execute("update cn_dushu_acticle set title = %s, size= %s where digest = %s" , (capObj['title'], capObj['size'], capObj['digest'] ))
        conn2.commit()
        myLogging.info('scap, ' + ":" + str(capObj['idx']))
        # , ', content: ', capObj['content'][0:15]

    except Exception as e:
        #     # 发生错误时回滚
        myLogging.error(e)
        if conn2:
            try:
                conn2.rollback()
            except Exception as ee:
                myLogging.error(ee)
        if not allowUpdate:
            return None
    try:
        csor2.execute(
            "select id,bookId from cn_dushu_acticle where digest = %s;",
            (capObj['digest'], ))
        conn2.commit()

        sqlObj = csor2.fetchone()
        capId = sqlObj[0]
        bookId = sqlObj[1]

        if bookId != capObj['bookId']:
            myLogging.info('update bookId' + str(capId))
            # 如果已存在,且bookId不对,更新下,防止错误cap占坑
            csor2.execute(
                "update cn_dushu_acticle set bookId = %s where id = %s;",
                (capObj['bookId'], capId))
            conn2.commit()

        capObj['id'] = capId
        return capId
    except Exception as e:
        #     # 发生错误时回滚
        myLogging.error(e)
        if conn2:
            try:
                conn2.rollback()
            except Exception as ee:
                myLogging.error(ee)
        return None

    csor2.close()
    conn2.close()
    def loadDid(self):
        conn, csor = getDushuConnCsor()

        csor.execute(
            "select digest from cn_dushu_book where operateStatus = 0;")
        conn.commit()
        ss = csor.fetchall()
        [self.ids.add(sid[0]) for sid in ss]

        csor.close()
        conn.close()
def getAll():
    conn, csor = getDushuConnCsor()

    csor.execute(
        "select DATE_FORMAT(updateTime, '%Y-%m-%d') as day, keshou_count, keshou_area, keshou_zhuzai_count"
        ", keshou_zhuzai_area, new_publish_count, new_publish_area, new_publish_zhuzai_count"
        ", new_publish_zhuzai_area, sign_count, sign_area, sign_zhuzai_count, sign_zhuzai_area from cn_test "
        "order by id desc limit 100")
    conn.commit()
    res = csor.fetchall()

    return res
Esempio n. 9
0
def fixUnuploadedCaps():
    bookObjs = getShuqiAllBookObjs()
    conn, csor = getDushuConnCsor()
    for bookObj in bookObjs:

        csor.execute(
            'select count(*) from ' + db_acticle + ' where bookId = %s ',
            (bookObj['id'], ))
        conn.commit()
        db_cap_count = csor.fetchone()[0]
        if db_cap_count <= bookObj['chapterNum']:
            continue

        csor.execute(
            'select id from ' + db_acticle +
            ' where bookId = %s order by id desc', (bookObj['id'], ))
        conn.commit()
        cids = csor.fetchall()

        deleteCount = 0
        for cidL in cids:
            cid = cidL[0]
            ossUrl = ossBaseUrl + str(cid) + '.json'
            r = requests.head(ossUrl)
            if r.status_code > 200:
                print 'bookId' + str(bookObj['id']) + ' cid: ' + str(
                    cid) + ' status_code: ' + str(r.status_code)

                #从章节表中删除
                delCapById(cid)
                deleteCount = deleteCount + 1
            else:
                nowCapCount = len(cids) - deleteCount
                if bookObj['chapterNum'] <= nowCapCount:
                    break

        if deleteCount > 0:  #有删除
            nowCapCount = len(cids) - deleteCount
            if bookObj['chapterNum'] == nowCapCount:
                continue  #正好相等时两种情况:1,完结,应该没问题,暂不管;2,连载交给定时updater

            if bookObj['chapterNum'] < nowCapCount:  #如果删除后章节还多,打日志,update
                print 'still more chapters, check bookId: ', str(bookObj['id'])

            # 删除后章节不够,update,
            elif bookObj['chapterNum'] > nowCapCount:
                bookObj['chapterNum'] = nowCapCount

            #update
            updateByBookObj(bookObj)

    csor.close()
    conn.close()
def getCapObjsById(bookId):
    conn,csor = getDushuConnCsor()

    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)

    dictCsor.execute("SELECT id,title,idx from " + db_acticle + " where bookId = %s and id < 63017738;", (bookId, ))
    conn.commit()
    capObjs = dictCsor.fetchallDict()

    csor.close()
    conn.close()

    return capObjs
Esempio n. 11
0
def deleteNLastChaps(dbBookId, limit):
    '''
    删除最新的N个章节
    :return: 
    '''
    conn, csor = getDushuConnCsor()
    csor.execute(
        'delete from ' + db_acticle +
        " where bookId = %s order by id desc limit %s;", (dbBookId, limit))
    conn.commit()

    csor.close()
    conn.close()
Esempio n. 12
0
def updateCapFromTo(f, t):

    conn2, csor2 = getDushuConnCsor()

    print 'from', str(f), ' to ', str(t)

    offset = 100

    begin = f
    end = begin + offset
    while end <= t:
        # sql = "select id, rawUrl,bookId,content from cn_dushu_acticle where id >= %d and id < %d" % (begin, end)
        try:
            csor2.execute(
                "select id, rawUrl,bookId,content from cn_dushu_acticle where id >= %d and id < %d",
                (begin, end))
            conn2.commit()
        except Exception as e:
            #     # 发生错误时回滚
            print 'mysql ex: ', e

        begin = begin + offset
        end = end + offset

        results = csor2.fetchall()
        for cap in results:
            cid = cap[0]
            capUrl = cap[1]
            bookId = cap[2]
            unclearContent = cap[3]
            if not (u'        言情小说_打造最新原创' in unclearContent
                    or unclearContent == 'None'):
                continue
            try:
                if not capUrl or len(capUrl) < 1:
                    print 'no url, bookId : ', bookId
                if 'shuqireader' in capUrl:
                    content = getContentByUrl(capUrl)
                    # updateContentById(cid, content)
                else:
                    content, host = getAndParse(capUrl)
                    if not content:
                        continue
                updateContentById(cid, content)
            except Exception as e:
                print 'cid ', cid, 'error: ', e
            except ValueError as er:
                print 'cid ', cid, 'error: ', er

    csor2.close()
    conn2.close()
Esempio n. 13
0
def shuqiAddInit():
    global gBookDict
    gBookDict = loadExistsSQId()

    conn2,csor2 = getDushuConnCsor()

    global donedegest
    donedegest = loadBloomFromFile(bloomDumpCapsName)
    if donedegest:
        print 'load bloom from file succ, no need load from db'
        return
    else:
        print 'load from db'
        donedegest  = getBloom(2000 * 10000)


    csor2.execute("select id from cn_dushu_acticle order by id desc limit 1")
    conn2.commit()
    length = csor2.fetchone()[0]
    step = 0
    carry = 500000
    # while step < length - 1500000:
    while step < length :

        csor2.execute("select digest from cn_dushu_acticle where id > %s and id < %s", (step, step + carry))
        conn2.commit()
        step = step + carry
        caps = csor2.fetchall()

        for cap in caps:
            digest = cap[0]
            # bookDigest = cap[1]
            # beg = time.time()
            # if not bookDigest in bookDict.keys():
            #     dictTook = time.time()
            #     print 'dict took: ', dictTook - beg
            #     continue
            # dictTook = time.time()
            # print 'dict took: ',dictTook - beg
            donedegest.add(digest)

            # blTook = time.time()
            # print 'bl took: ',blTook - dictTook
    # global gBookDict
    # gBookDict =  bookDict
    dumpBloomToFile(donedegest, bloomDumpCapsName)

    csor2.close()
    conn2.close()
    return donedegest
Esempio n. 14
0
def getIdsByType(confType):
    conn, csor = getDushuConnCsor()

    try:
        csor.execute("select ids from " + db_typeBook + " where type = %s",
                     (confType, ))
        conn.commit()
    except Exception as e:
        myLogging.warning('get bookType exception: ' + str(e))

    ids = csor.fetchone()[0]
    csor.close()
    conn.close()
    return ids
Esempio n. 15
0
def updateOneFieldByOneField(upFieldName, upFieldValue, byFieldName,
                             byFieldValue):
    conn, csor = getDushuConnCsor()
    try:
        csor.execute(
            "update " + db_dushu + " set " + upFieldName +
            "  = %s, updateTime =  " + str(int(time.time())) + " where " +
            byFieldName + " = %s", (upFieldValue, byFieldValue))
        conn.commit()
    except Exception as e:
        myLogging.warning('update bookType exception: ' + str(e))

    csor.close()
    conn.close()
Esempio n. 16
0
def shuqiAddInitTmp():
    conn2,csor2 = getDushuConnCsor()

    csor2.execute("select rawUrl from cn_dushu_book")
    conn2.commit()
    # bookDict = dict()
    res = csor2.fetchall()
    for book in res:
        source = book[0]
        global donedegest
        donedegest.add(source)

    csor2.close()
    conn2.close()
Esempio n. 17
0
def updateBookTypeByRawUrl(type, rawUrl):
    conn, csor = getDushuConnCsor()
    try:
        csor.execute(
            "update " + db_dushu + " set bookType = %s where rawUrl = %s", (
                type,
                rawUrl,
            ))
        conn.commit()
    except Exception as e:
        myLogging.warning('update bookType exception: ' + str(e))

    csor.close()
    conn.close()
Esempio n. 18
0
def getLatestChapByBookId(bookId):
    conn, csor = getDushuConnCsor()

    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)
    try:
        dictCsor.execute(
            "select * from " + db_acticle +
            " where bookId = %s order by id desc limit 1;", (bookId, ))
        conn.commit()
    except Exception as e:
        myLogging.warning('getLatestChapByBookId exception: ' + str(e))
    bookObj = dictCsor.fetchoneDict()
    csor.close()
    conn.close()
    return bookObj
Esempio n. 19
0
def getChapTitlesByBookId(bookId):
    conn, csor = getDushuConnCsor()
    titles = set()

    csor.execute('select title from ' + db_acticle + " where bookId = %s",
                 (bookId, ))
    conn.commit()

    results = csor.fetchall()
    for capObj in results:
        titles.add(capObj[0])
    csor.close()
    conn.close()

    return titles
Esempio n. 20
0
    def loadDid(self):
        conn, csor = getDushuConnCsor()

        csor.execute("select source from cn_dushu_book;")
        conn.commit()
        ss = csor.fetchall()
        [self.ids.add(sid[0]) for sid in ss]

        csor.execute("select sid from shuqi_deleted_ids;")
        conn.commit()
        ss = csor.fetchall()
        [self.ids.add('shuqi' + str(sid[0])) for sid in ss]

        csor.close()
        conn.close()
Esempio n. 21
0
def getFieldByBookId(field, bookId):
    conn, csor = getDushuConnCsor()
    idxs = set()

    csor.execute(
        'select ' + field + ' from ' + db_acticle + " where bookId = %s",
        (bookId, ))
    conn.commit()

    results = csor.fetchall()
    for capObj in results:
        idxs.add(capObj[0])
    csor.close()
    conn.close()

    return idxs
Esempio n. 22
0
def insertBookWithConn(bookObj, allowUpdate=True, conn2=None, csor2=None):

    if not conn2 or not csor2:
        conn2, csor2 = getDushuConnCsor()

    userId = random.randint(1, 50)

    updateTime = int(time.time())

    digest = getBookDigest(bookObj)
    bookObj['digest'] = digest

    #统一清理操作
    bookObj['subtitle'] = subTitleClean(bookObj['subtitle'])

    if not bookObj.has_key('source'):
        bookObj['source'] = ''

    try:
        csor2.execute('insert  ' + db_dushu +
          '(categoryCode,typeCode,category,type,userId,title,subtitle,imgUrl,author,updateTime' \
          ",rawUrl,source,digest,status,viewNum, chapterNum, bookType, size) values" \
          "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, %s)" \
          , (bookObj['categoryCode'],bookObj['typeCode'], bookObj['category'], bookObj['type'], userId,bookObj['title']
             ,bookObj['subtitle'],bookObj['imgUrl'],bookObj['author'],updateTime, bookObj['rawUrl']
             ,bookObj['source'],digest, 11,bookObj['viewNum'],bookObj['chapterNum'],bookObj['bookType'],bookObj['size']))
        # csorDoc.execute('update cn_dushu_book set subtitle = %s where digest = %s'
        #   , (bookObj['subtitle'],digest))
        conn2.commit()
        myLogging.info('succ book, ' +
                       unicode(bookObj['title']).encode('utf-8'))
    except Exception, e:
        #     # 发生错误时回滚
        myLogging.warning('update rollback; maybe exists, err:  %s',
                          traceback.format_exc())
        if conn2:
            try:
                conn2.rollback()
            except Exception as ee:
                myLogging.error('rollback error : ' + bookObj['rawUrl'])

        if u'完结' == bookObj['bookType']:
            updateBookTypeByRawUrl(bookObj['bookType'], bookObj['rawUrl'])
            # return None #有bug
        if not allowUpdate:
            return None
Esempio n. 23
0
def getBookCount():
    '''
    获取图书总数
    :param dbid: 
    :return: 
    '''
    conn, csor = getDushuConnCsor()

    try:
        csor.execute("select count(*) from " + db_dushu)
        conn.commit()
    except Exception as e:
        myLogging.warning('update bookType exception: ' + str(e))
    count = csor.fetchone()[0]
    csor.close()
    conn.close()
    return count
Esempio n. 24
0
def delCapById(cid):
    conn2, csor2 = getDushuConnCsor()

    try:
        csor2.execute("delete from " + db_acticle + " where id = %s", (cid, ))
        conn2.commit()
    except Exception as e:
        #     # 发生错误时回滚
        myLogging.error('mysql ex: ' + str(e))
        if conn2:
            try:
                conn2.rollback()
            except Exception as ee:
                myLogging.error('rollback error : ' + str(cid))

    csor2.close()
    conn2.close()
Esempio n. 25
0
def loadExistsSQId():
    conn2, csor2 = getDushuConnCsor()

    bloom = getBloom(200000)
    csor2.execute(
        "select source from cn_dushu_book where source like 'shuqi%' and id < 127400;"
    )  #id > %s and id < %s", (step, step + carry))
    conn2.commit()
    caps = csor2.fetchall()
    for s in caps:
        bloom.add(s)
    dumpBloomToFile(bloom, 'local/BooksBloomDump')

    csor2.close()
    conn2.close()

    return bloom
def getZssqAllBookObjs():
    '''
    获取所有追书神器的主键和相关信息:id,rawUrl,chapterNum,source,digest
    :return bookObjs即: [bookObj{"id":"1",,}]: 
    '''

    conn, csor = getDushuConnCsor()
    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)

    dictCsor.execute("SELECT id from cn_dushu_book where   "
                 "  rawUrl like 'http://api.zhuishushenqi.com/book/%';")
    conn.commit()
    bookObjs = dictCsor.fetchallDict()

    csor.close()
    conn.close()

    return bookObjs
def getShuqiIdRawUrlAsBookObjs():
    '''
    获取所有Shuqi的主键和相关信息:id,rawUrl,chapterNum,source,digest
    :return bookObjs即: [bookObj{"id":"1",,}]: 
    '''

    conn, csor = getDushuConnCsor()
    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)

    dictCsor.execute("SELECT id,rawUrl from cn_dushu_book where operateStatus = 0  "
                 " and rawUrl like 'http://api.shuqireader.com/reader/bc_cover.php%';")
    conn.commit()
    bookObjs = dictCsor.fetchallDict()

    csor.close()
    conn.close()

    return bookObjs
Esempio n. 28
0
def updateIdsByType(confType, ids):
    conn, csor = getDushuConnCsor()

    try:
        csor.execute(
            "update " + db_typeBook + ' set ids = %s  where type = %s',
            (ids, confType))
        conn.commit()
    except Exception as e:
        myLogging.warning('update bookType exception: ' + str(e))

    csor.close()
    conn.close()
    # return ids


# if __name__ == '__main__':
#     delBookById(227921)
Esempio n. 29
0
def getBookObjById(dbid):
    '''
    更加库中主键id获取book对象
    :param dbid: 
    :return: 
    '''
    conn, csor = getDushuConnCsor()
    dictCsor = conn.cursor(MySQLdb.cursors.DictCursor)
    try:
        dictCsor.execute("select * from " + db_dushu + " where id = %s",
                         (dbid, ))
        conn.commit()
    except Exception as e:
        myLogging.warning('update bookType exception: ' + str(e))
    bookObj = dictCsor.fetchoneDict()
    csor.close()
    conn.close()
    return bookObj
Esempio n. 30
0
def deleteChapsLargerThanIdx(bookId, idx):
    '''
    删除章节表中所有大于此idx的
    :param bookId: 
    :param idx: 
    :return: 
    '''
    conn, csor = getDushuConnCsor()
    try:
        csor.execute(
            'delete from ' + db_acticle + " where bookId = %s and idx > %s",
            (bookId, idx))
        conn.commit()
    except Exception as e:
        myLogging.warning(e)

    csor.close()
    conn.close()