Example #1
0
def main():

    # config

    exportpath = 'F:/'
    keyword = '女士手表'
    exportfile = os.path.join(
        exportpath,
        'verified_%s_attr.txt' % keyword.decode('utf-8').encode('GBK'))

    # action

    dbConn = getDBConn('ibbd2')
    cursor = dbConn.cursor()
    print 'execute sql query'
    cursor.execute(sql % keyword)
    rows = cursor.fetchall()
    rows = [[
        row[0],
        (parser.unescape(row[1]).encode('GB2312', 'ignore').replace('\n', ' ')
         if row[1] else '')
    ] for row in rows]
    if os.path.exists(exportfile):
        os.remove(exportfile)
    with open(exportfile, 'a') as f:
        f.write('\n'.join([str(row[0]) + '\t' + row[1] for row in rows]))
    cursor.close()
    dbConn.close()
def saveRedisSchedulePlan():
    rdSvr = getRedisConn2()
    db = getDBConn('ibbd2')
    dbCursor = db.cursor()
    for schedule in SCHEDULES:
        ids = rdSvr.smembers(schedule)
        dbCursor.executemany('INSERT INTO topspider.schedule_plan_log VALUES(%s,CURDATE(),%s)', [[schedule, iid]
                             for iid in ids])
    db.commit()
    dbCursor.close()
    db.close()
    del rdSvr
Example #3
0
def queryRateDict():
    db = getDBConn('ibbd2')
    rateDict = {}
    db.query('SELECT theme,keyword,appraisal,score FROM ibbd2.lib_rate_library')
    r = db.store_result()
    for i in range(r.num_rows()):
        row = r.fetch_row()[0]
        if rateDict.has_key(row[0]):
            rateDict[row[0]].append([row[1].encode('utf-8'), row[2].encode('utf-8'), row[3]])
        else:
            rateDict[row[0]] = [[row[1].encode('utf-8'), row[2].encode('utf-8'), row[3]]]
    db.close()
    return rateDict
Example #4
0
def saveRedisSchedulePlan():
    rdSvr = getRedisConn2()
    db = getDBConn('ibbd2')
    dbCursor = db.cursor()
    for schedule in SCHEDULES:
        ids = rdSvr.smembers(schedule)
        dbCursor.executemany(
            'INSERT INTO topspider.schedule_plan_log VALUES(%s,CURDATE(),%s)',
            [[schedule, iid] for iid in ids])
    db.commit()
    dbCursor.close()
    db.close()
    del rdSvr
Example #5
0
def getTableDDL(dbName):
    j = {}
    db = getDBConn(dbName)
    db.query("SHOW TABLES")
    r = db.store_result()
    tables = [r.fetch_row()[0][0] for i in range(r.num_rows())]
    for table in tables:
        cmdStr = "SHOW CREATE TABLE %s.%s" % (dbName, table)
        db.query(cmdStr)
        r = db.store_result()
        rs = r.fetch_row()[0]
        j[rs[0]] = rs[1].encode("utf-8")
    db.close()
    return j
Example #6
0
def getTableDDL(dbName):
    j = {}
    db = getDBConn(dbName)
    db.query('SHOW TABLES')
    r = db.store_result()
    tables = [r.fetch_row()[0][0] for i in range(r.num_rows())]
    for table in tables:
        cmdStr = 'SHOW CREATE TABLE %s.%s' % (dbName, table)
        db.query(cmdStr)
        r = db.store_result()
        rs = r.fetch_row()[0]
        j[rs[0]] = rs[1].encode('utf-8')
    db.close()
    return j
Example #7
0
def evaluateRatesByItemId(item_id):
    rateDict = queryRateDict()
    db = getDBConn('ibbd2')
    db2 = getDBConn('ibbd2')
    c = db.cursor()
    c2 = db2.cursor()
    c.execute("""SELECT rate_id, content, `date`
        FROM topspider.top_item_rate
        WHERE auc_num_id='%s'
        GROUP BY rate_id"""
              % item_id)
    while 1:
        rows = c.fetchmany(300)
        rows = dict((t[0], [t[1].encode('utf-8'), t[2]]) for t in rows)
        if len(rows) == 0:
            break
        metadata = evaluateRates([[k, v[0]] for (k, v) in rows.items()], rateDict, ADVS)
        for k in metadata.keys():
            for (key, val) in metadata[k].items():
                if len(val) < 1:
                    continue
                cmdStr = \
                    "INSERT INTO ibbd2.quota_rate_evaluate \
                    VALUES(NULL,NULL,'{0}','{1}','{2}','{3}','{4}','{5}',{6},NOW())".format(
                    item_id,
                    k,
                    rows[k][1],
                    escapeSQLCommand(rows[k][0]),
                    key.encode('utf-8'),
                    val.keys()[0],
                    val.values()[0],
                    )
                try:
                    c2.execute(cmdStr)
                except Exception, e:
                    print e
Example #8
0
def evaluateRatesByItemId(item_id):
    rateDict = queryRateDict()
    db = getDBConn('ibbd2')
    db2 = getDBConn('ibbd2')
    c = db.cursor()
    c2 = db2.cursor()
    c.execute("""SELECT rate_id, content, `date`
        FROM topspider.top_item_rate
        WHERE auc_num_id='%s'
        GROUP BY rate_id""" % item_id)
    while 1:
        rows = c.fetchmany(300)
        rows = dict((t[0], [t[1].encode('utf-8'), t[2]]) for t in rows)
        if len(rows) == 0:
            break
        metadata = evaluateRates([[k, v[0]] for (k, v) in rows.items()],
                                 rateDict, ADVS)
        for k in metadata.keys():
            for (key, val) in metadata[k].items():
                if len(val) < 1:
                    continue
                cmdStr = \
                    "INSERT INTO ibbd2.quota_rate_evaluate \
                    VALUES(NULL,NULL,'{0}','{1}','{2}','{3}','{4}','{5}',{6},NOW())"                                                                                    .format(
                    item_id,
                    k,
                    rows[k][1],
                    escapeSQLCommand(rows[k][0]),
                    key.encode('utf-8'),
                    val.keys()[0],
                    val.values()[0],
                    )
                try:
                    c2.execute(cmdStr)
                except Exception, e:
                    print e
Example #9
0
def getTableDDL(dbName):
    j = {}
    db = getDBConn(dbName)
    db.query("SELECT TABLE_NAME, TABLE_TYPE FROM information_schema.TABLES WHERE TABLE_SCHEMA='%s'" % dbName)
    r = db.store_result()
    tables = [r.fetch_row()[0] for i in range(r.num_rows())]
    tables = [table[0] for table in tables if table[1] == 'BASE TABLE']
    for table in tables:
        cmdStr = 'SHOW CREATE TABLE %s.%s' % (dbName, table)
        db.query(cmdStr)
        r = db.store_result()
        rs = r.fetch_row()[0]
        j[rs[0]] = rs[1].encode('utf-8')
    db.close()
    return j
Example #10
0
def getDbRoutine(dbName):
    j = {}
    db = getDBConn(dbName)
    db.query("SELECT ROUTINE_NAME FROM information_schema.ROUTINES WHERE ROUTINE_SCHEMA='%s'" % dbName)
    r = db.store_result()
    procs = [r.fetch_row()[0][0] for i in range(r.num_rows())]
    for proc in procs:
        cmdStr = 'SHOW CREATE PROCEDURE %s.%s' % (dbName, proc)
        db.query(cmdStr)
        r = db.store_result()
        rs = r.fetch_row()[0]
        j[rs[0]] = rs[2].encode('utf-8')
        j[rs[0]] = re.sub(r'\n{2,10}', '\n', j[rs[0]])
        j[rs[0]] = j[rs[0]].replace('DEFINER=`ibbd`@`%` ', '')
    db.close()
    return j
Example #11
0
def updateTopZhitongcheResult():
    while 1:

        # exclude hours

        if time.localtime().tm_hour in exclude_hours:
            time.sleep(60 * 10)
            continue

        # 获取关键词列表

        db = getDBConn('ibbd2')
        cmdStr = 'SELECT DISTINCT(keyword) FROM ibbd2.user_keywords_ztc'
        db.query(cmdStr)
        r = db.store_result()
        keywords = [
            r.fetch_row()[0][0].encode('utf-8') for i in range(r.num_rows())
        ]
        db.close()

        # 获取直通车结果

        for keyword in keywords:
            log.info('%s Start', keyword)

            # 保存结果

            try:
                metadata = getZhitongcheResult(keyword, 'utf-8', 5)
                saveZhitongcheResult(metadata)

                # 更新店铺列表

                executeSQLCommand(
                    'ibbd2',
                    'DELETE FROM ibbd2.ststc_ztc_shops WHERE keyword=%s',
                    [keyword])
                sellerDict = dict((record['sellerId'], record['wangwang'])
                                  for record in metadata)
                executeSQLCommand(
                    'ibbd2',
                    'INSERT INTO ibbd2.ststc_ztc_shops VALUES(%s,%s,%s)',
                    [[keyword, k, v] for (k, v) in sellerDict.items()])
                log.info('%s Success' % keyword)
            except Exception, e:
                log.error('%s Error %s', keyword, str(e))
        log.info('Sleep %d', 60 * 25)
Example #12
0
def queryRateDict():
    db = getDBConn('ibbd2')
    rateDict = {}
    db.query(
        'SELECT theme,keyword,appraisal,score FROM ibbd2.lib_rate_library')
    r = db.store_result()
    for i in range(r.num_rows()):
        row = r.fetch_row()[0]
        if rateDict.has_key(row[0]):
            rateDict[row[0]].append(
                [row[1].encode('utf-8'), row[2].encode('utf-8'), row[3]])
        else:
            rateDict[row[0]] = [[
                row[1].encode('utf-8'), row[2].encode('utf-8'), row[3]
            ]]
    db.close()
    return rateDict
Example #13
0
def run():
    db = getDBConn('ibbd2')
    db.query("""SELECT T2.item_id,T2.user_num_id
      FROM ibbd2.user_config T1
      LEFT JOIN ibbd2.top_itemsearchresult T2
      ON T1.config_value=T2.keyword
      AND DATE(T2.population_tsmp)=CURDATE()
      WHERE T1.config_key='keyword'
      AND T1.status='1'
      GROUP BY T2.item_id""")
    r = db.store_result()
    rs = [r.fetch_row()[0][0] for i in range(r.num_rows())]
    db.close()
    for iid in rs:
        print time.asctime(), iid
        evaluateRatesByItemId(iid)
    pass
Example #14
0
def run():
    db = getDBConn('ibbd2')
    db.query("""SELECT T2.item_id,T2.user_num_id
      FROM ibbd2.user_config T1
      LEFT JOIN ibbd2.top_itemsearchresult T2
      ON T1.config_value=T2.keyword
      AND DATE(T2.population_tsmp)=CURDATE()
      WHERE T1.config_key='keyword'
      AND T1.status='1'
      GROUP BY T2.item_id""")
    r = db.store_result()
    rs = [r.fetch_row()[0][0] for i in range(r.num_rows())]
    db.close()
    for iid in rs:
        print time.asctime(), iid
        evaluateRatesByItemId(iid)
    pass
Example #15
0
def updateTopZhitongcheResult():
    while 1:

        # exclude hours

        if time.localtime().tm_hour in exclude_hours:
            time.sleep(60 * 10)
            continue

        # 获取关键词列表

        db = getDBConn('ibbd2')
        cmdStr = 'SELECT DISTINCT(keyword) FROM ibbd2.user_keywords_ztc'
        db.query(cmdStr)
        r = db.store_result()
        keywords = [r.fetch_row()[0][0].encode('utf-8') for i in
                    range(r.num_rows())]
        db.close()

        # 获取直通车结果

        for keyword in keywords:
            log.info('%s Start', keyword)

            # 保存结果

            try:
                metadata = getZhitongcheResult(keyword, 'utf-8', 5)
                saveZhitongcheResult(metadata)

                # 更新店铺列表

                executeSQLCommand('ibbd2',
                                  'DELETE FROM ibbd2.ststc_ztc_shops WHERE keyword=%s'
                                  , [keyword])
                sellerDict = dict((record['sellerId'], record['wangwang'])
                                  for record in metadata)
                executeSQLCommand('ibbd2',
                                  'INSERT INTO ibbd2.ststc_ztc_shops VALUES(%s,%s,%s)'
                                  , [[keyword, k, v] for (k, v) in
                                  sellerDict.items()])
                log.info('%s Success' % keyword)
            except Exception, e:
                log.error('%s Error %s', keyword, str(e))
        log.info('Sleep %d', 60 * 25)
Example #16
0
def saveRedisScheduleError(runDate):
    rdSvr = getRedisConn2()
    db = getDBConn('ibbd2')
    dbCursor = db.cursor()

    def parseLog(val):
        j = {}
        try:
            j = json.loads(val)
            return [j['msg'], j['SlaveID']]
        except:
            try:
                j = json.loads(re.sub(r'(\d+)L', r'\1', val.replace("'", '"')))
            except:
                j = {'msg': None, 'SlaveID': None}
        return [j['msg'], j['SlaveID']]

    def resort(arr):

        # resort [schedule,k,schedule_error,msg,slaveid] to [schedule,slaveid,k,schedule_error,msg]

        arr.insert(2, arr[-1])
        return arr[:-1]

    for (schedule, schedule_error) in SCHEDULE_ERROR_MAP.items():
        print schedule, schedule_error, rdSvr.hlen(schedule_error)
        if rdSvr.hlen(schedule_error) > 0:
            dbCursor.executemany(
                'INSERT INTO topspider.schedule_error_log VALUES(%s,%s,%s,%s,%s,%s,NOW())',
                [
                    resort([schedule, runDate, k, schedule_error] +
                           parseLog(v))
                    for (k, v) in rdSvr.hgetall(schedule_error).items()
                ])

        # rdSvr.delete(schedule_error)

    db.commit()
    dbCursor.close()
    db.close()
    del rdSvr
Example #17
0
def parseRedisPageIndustryOverview():
    dbConn = getDBConn('ibbd2')
    redisConn = getRedisConn()
    toDate = date.today() - timedelta(2)
    fromDate = toDate - timedelta(15)

    # start

    schedule_info(dbConn, redisConn)
    page_index(dbConn, redisConn)
    page_industry_overview_saletrend(dbConn, redisConn, fromDate, toDate)
    page_industry_overview_pricetrend(dbConn, redisConn, fromDate, toDate)
    page_industry_hotregion(dbConn, redisConn, fromDate, toDate)
    page_industry_overview(dbConn, redisConn, fromDate, toDate)
    page_industry_catsales(dbConn, redisConn, fromDate, toDate)
    redisConn.set('ibbd-ststc-date', (toDate + timedelta(1)).strftime('%Y/%m/%d'))

    # end

    dbConn.close()
    del redisConn
    del dbConn
Example #18
0
def main():

    # config

    exportpath = 'F:/'
    keyword = '女士手表'
    exportfile = os.path.join(exportpath, 'verified_%s_attr.txt' % keyword.decode('utf-8').encode('GBK'))

    # action

    dbConn = getDBConn('ibbd2')
    cursor = dbConn.cursor()
    print 'execute sql query'
    cursor.execute(sql % keyword)
    rows = cursor.fetchall()
    rows = [[row[0], (parser.unescape(row[1]).encode('GB2312', 'ignore').replace('\n', ' ') if row[1] else '')]
            for row in rows]
    if os.path.exists(exportfile):
        os.remove(exportfile)
    with open(exportfile, 'a') as f:
        f.write('\n'.join([str(row[0]) + '\t' + row[1] for row in rows]))
    cursor.close()
    dbConn.close()
Example #19
0
def parseRedisPageIndustryOverview():
    dbConn = getDBConn('ibbd2')
    redisConn = getRedisConn()
    toDate = date.today() - timedelta(2)
    fromDate = toDate - timedelta(15)

    # start

    schedule_info(dbConn, redisConn)
    page_index(dbConn, redisConn)
    page_industry_overview_saletrend(dbConn, redisConn, fromDate, toDate)
    page_industry_overview_pricetrend(dbConn, redisConn, fromDate, toDate)
    page_industry_hotregion(dbConn, redisConn, fromDate, toDate)
    page_industry_overview(dbConn, redisConn, fromDate, toDate)
    page_industry_catsales(dbConn, redisConn, fromDate, toDate)
    redisConn.set('ibbd-ststc-date',
                  (toDate + timedelta(1)).strftime('%Y/%m/%d'))

    # end

    dbConn.close()
    del redisConn
    del dbConn
def saveRedisScheduleError(runDate):
    rdSvr = getRedisConn2()
    db = getDBConn('ibbd2')
    dbCursor = db.cursor()

    def parseLog(val):
        j = {}
        try:
            j = json.loads(val)
            return [j['msg'], j['SlaveID']]
        except:
            try:
                j = json.loads(re.sub(r'(\d+)L', r'\1', val.replace("'", '"')))
            except:
                j = {'msg': None, 'SlaveID': None}
        return [j['msg'], j['SlaveID']]

    def resort(arr):

        # resort [schedule,k,schedule_error,msg,slaveid] to [schedule,slaveid,k,schedule_error,msg]

        arr.insert(2, arr[-1])
        return arr[:-1]

    for (schedule, schedule_error) in SCHEDULE_ERROR_MAP.items():
        print schedule, schedule_error, rdSvr.hlen(schedule_error)
        if rdSvr.hlen(schedule_error) > 0:
            dbCursor.executemany('INSERT INTO topspider.schedule_error_log VALUES(%s,%s,%s,%s,%s,%s,NOW())',
                                 [resort([schedule, runDate, k, schedule_error] + parseLog(v)) for (k, v) in
                                 rdSvr.hgetall(schedule_error).items()])

        # rdSvr.delete(schedule_error)

    db.commit()
    dbCursor.close()
    db.close()
    del rdSvr