def main(): # config exportpath = 'F:/' keyword = '女士手表' exportfile = os.path.join( exportpath, 'verified_%s_attr.txt' % keyword.decode('utf-8').encode('GBK')) # action dbConn = getDBConn('ibbd2') cursor = dbConn.cursor() print 'execute sql query' cursor.execute(sql % keyword) rows = cursor.fetchall() rows = [[ row[0], (parser.unescape(row[1]).encode('GB2312', 'ignore').replace('\n', ' ') if row[1] else '') ] for row in rows] if os.path.exists(exportfile): os.remove(exportfile) with open(exportfile, 'a') as f: f.write('\n'.join([str(row[0]) + '\t' + row[1] for row in rows])) cursor.close() dbConn.close()
def saveRedisSchedulePlan(): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() for schedule in SCHEDULES: ids = rdSvr.smembers(schedule) dbCursor.executemany('INSERT INTO topspider.schedule_plan_log VALUES(%s,CURDATE(),%s)', [[schedule, iid] for iid in ids]) db.commit() dbCursor.close() db.close() del rdSvr
def queryRateDict(): db = getDBConn('ibbd2') rateDict = {} db.query('SELECT theme,keyword,appraisal,score FROM ibbd2.lib_rate_library') r = db.store_result() for i in range(r.num_rows()): row = r.fetch_row()[0] if rateDict.has_key(row[0]): rateDict[row[0]].append([row[1].encode('utf-8'), row[2].encode('utf-8'), row[3]]) else: rateDict[row[0]] = [[row[1].encode('utf-8'), row[2].encode('utf-8'), row[3]]] db.close() return rateDict
def saveRedisSchedulePlan(): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() for schedule in SCHEDULES: ids = rdSvr.smembers(schedule) dbCursor.executemany( 'INSERT INTO topspider.schedule_plan_log VALUES(%s,CURDATE(),%s)', [[schedule, iid] for iid in ids]) db.commit() dbCursor.close() db.close() del rdSvr
def getTableDDL(dbName): j = {} db = getDBConn(dbName) db.query("SHOW TABLES") r = db.store_result() tables = [r.fetch_row()[0][0] for i in range(r.num_rows())] for table in tables: cmdStr = "SHOW CREATE TABLE %s.%s" % (dbName, table) db.query(cmdStr) r = db.store_result() rs = r.fetch_row()[0] j[rs[0]] = rs[1].encode("utf-8") db.close() return j
def getTableDDL(dbName): j = {} db = getDBConn(dbName) db.query('SHOW TABLES') r = db.store_result() tables = [r.fetch_row()[0][0] for i in range(r.num_rows())] for table in tables: cmdStr = 'SHOW CREATE TABLE %s.%s' % (dbName, table) db.query(cmdStr) r = db.store_result() rs = r.fetch_row()[0] j[rs[0]] = rs[1].encode('utf-8') db.close() return j
def evaluateRatesByItemId(item_id): rateDict = queryRateDict() db = getDBConn('ibbd2') db2 = getDBConn('ibbd2') c = db.cursor() c2 = db2.cursor() c.execute("""SELECT rate_id, content, `date` FROM topspider.top_item_rate WHERE auc_num_id='%s' GROUP BY rate_id""" % item_id) while 1: rows = c.fetchmany(300) rows = dict((t[0], [t[1].encode('utf-8'), t[2]]) for t in rows) if len(rows) == 0: break metadata = evaluateRates([[k, v[0]] for (k, v) in rows.items()], rateDict, ADVS) for k in metadata.keys(): for (key, val) in metadata[k].items(): if len(val) < 1: continue cmdStr = \ "INSERT INTO ibbd2.quota_rate_evaluate \ VALUES(NULL,NULL,'{0}','{1}','{2}','{3}','{4}','{5}',{6},NOW())".format( item_id, k, rows[k][1], escapeSQLCommand(rows[k][0]), key.encode('utf-8'), val.keys()[0], val.values()[0], ) try: c2.execute(cmdStr) except Exception, e: print e
def evaluateRatesByItemId(item_id): rateDict = queryRateDict() db = getDBConn('ibbd2') db2 = getDBConn('ibbd2') c = db.cursor() c2 = db2.cursor() c.execute("""SELECT rate_id, content, `date` FROM topspider.top_item_rate WHERE auc_num_id='%s' GROUP BY rate_id""" % item_id) while 1: rows = c.fetchmany(300) rows = dict((t[0], [t[1].encode('utf-8'), t[2]]) for t in rows) if len(rows) == 0: break metadata = evaluateRates([[k, v[0]] for (k, v) in rows.items()], rateDict, ADVS) for k in metadata.keys(): for (key, val) in metadata[k].items(): if len(val) < 1: continue cmdStr = \ "INSERT INTO ibbd2.quota_rate_evaluate \ VALUES(NULL,NULL,'{0}','{1}','{2}','{3}','{4}','{5}',{6},NOW())" .format( item_id, k, rows[k][1], escapeSQLCommand(rows[k][0]), key.encode('utf-8'), val.keys()[0], val.values()[0], ) try: c2.execute(cmdStr) except Exception, e: print e
def getTableDDL(dbName): j = {} db = getDBConn(dbName) db.query("SELECT TABLE_NAME, TABLE_TYPE FROM information_schema.TABLES WHERE TABLE_SCHEMA='%s'" % dbName) r = db.store_result() tables = [r.fetch_row()[0] for i in range(r.num_rows())] tables = [table[0] for table in tables if table[1] == 'BASE TABLE'] for table in tables: cmdStr = 'SHOW CREATE TABLE %s.%s' % (dbName, table) db.query(cmdStr) r = db.store_result() rs = r.fetch_row()[0] j[rs[0]] = rs[1].encode('utf-8') db.close() return j
def getDbRoutine(dbName): j = {} db = getDBConn(dbName) db.query("SELECT ROUTINE_NAME FROM information_schema.ROUTINES WHERE ROUTINE_SCHEMA='%s'" % dbName) r = db.store_result() procs = [r.fetch_row()[0][0] for i in range(r.num_rows())] for proc in procs: cmdStr = 'SHOW CREATE PROCEDURE %s.%s' % (dbName, proc) db.query(cmdStr) r = db.store_result() rs = r.fetch_row()[0] j[rs[0]] = rs[2].encode('utf-8') j[rs[0]] = re.sub(r'\n{2,10}', '\n', j[rs[0]]) j[rs[0]] = j[rs[0]].replace('DEFINER=`ibbd`@`%` ', '') db.close() return j
def updateTopZhitongcheResult(): while 1: # exclude hours if time.localtime().tm_hour in exclude_hours: time.sleep(60 * 10) continue # 获取关键词列表 db = getDBConn('ibbd2') cmdStr = 'SELECT DISTINCT(keyword) FROM ibbd2.user_keywords_ztc' db.query(cmdStr) r = db.store_result() keywords = [ r.fetch_row()[0][0].encode('utf-8') for i in range(r.num_rows()) ] db.close() # 获取直通车结果 for keyword in keywords: log.info('%s Start', keyword) # 保存结果 try: metadata = getZhitongcheResult(keyword, 'utf-8', 5) saveZhitongcheResult(metadata) # 更新店铺列表 executeSQLCommand( 'ibbd2', 'DELETE FROM ibbd2.ststc_ztc_shops WHERE keyword=%s', [keyword]) sellerDict = dict((record['sellerId'], record['wangwang']) for record in metadata) executeSQLCommand( 'ibbd2', 'INSERT INTO ibbd2.ststc_ztc_shops VALUES(%s,%s,%s)', [[keyword, k, v] for (k, v) in sellerDict.items()]) log.info('%s Success' % keyword) except Exception, e: log.error('%s Error %s', keyword, str(e)) log.info('Sleep %d', 60 * 25)
def queryRateDict(): db = getDBConn('ibbd2') rateDict = {} db.query( 'SELECT theme,keyword,appraisal,score FROM ibbd2.lib_rate_library') r = db.store_result() for i in range(r.num_rows()): row = r.fetch_row()[0] if rateDict.has_key(row[0]): rateDict[row[0]].append( [row[1].encode('utf-8'), row[2].encode('utf-8'), row[3]]) else: rateDict[row[0]] = [[ row[1].encode('utf-8'), row[2].encode('utf-8'), row[3] ]] db.close() return rateDict
def run(): db = getDBConn('ibbd2') db.query("""SELECT T2.item_id,T2.user_num_id FROM ibbd2.user_config T1 LEFT JOIN ibbd2.top_itemsearchresult T2 ON T1.config_value=T2.keyword AND DATE(T2.population_tsmp)=CURDATE() WHERE T1.config_key='keyword' AND T1.status='1' GROUP BY T2.item_id""") r = db.store_result() rs = [r.fetch_row()[0][0] for i in range(r.num_rows())] db.close() for iid in rs: print time.asctime(), iid evaluateRatesByItemId(iid) pass
def updateTopZhitongcheResult(): while 1: # exclude hours if time.localtime().tm_hour in exclude_hours: time.sleep(60 * 10) continue # 获取关键词列表 db = getDBConn('ibbd2') cmdStr = 'SELECT DISTINCT(keyword) FROM ibbd2.user_keywords_ztc' db.query(cmdStr) r = db.store_result() keywords = [r.fetch_row()[0][0].encode('utf-8') for i in range(r.num_rows())] db.close() # 获取直通车结果 for keyword in keywords: log.info('%s Start', keyword) # 保存结果 try: metadata = getZhitongcheResult(keyword, 'utf-8', 5) saveZhitongcheResult(metadata) # 更新店铺列表 executeSQLCommand('ibbd2', 'DELETE FROM ibbd2.ststc_ztc_shops WHERE keyword=%s' , [keyword]) sellerDict = dict((record['sellerId'], record['wangwang']) for record in metadata) executeSQLCommand('ibbd2', 'INSERT INTO ibbd2.ststc_ztc_shops VALUES(%s,%s,%s)' , [[keyword, k, v] for (k, v) in sellerDict.items()]) log.info('%s Success' % keyword) except Exception, e: log.error('%s Error %s', keyword, str(e)) log.info('Sleep %d', 60 * 25)
def saveRedisScheduleError(runDate): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() def parseLog(val): j = {} try: j = json.loads(val) return [j['msg'], j['SlaveID']] except: try: j = json.loads(re.sub(r'(\d+)L', r'\1', val.replace("'", '"'))) except: j = {'msg': None, 'SlaveID': None} return [j['msg'], j['SlaveID']] def resort(arr): # resort [schedule,k,schedule_error,msg,slaveid] to [schedule,slaveid,k,schedule_error,msg] arr.insert(2, arr[-1]) return arr[:-1] for (schedule, schedule_error) in SCHEDULE_ERROR_MAP.items(): print schedule, schedule_error, rdSvr.hlen(schedule_error) if rdSvr.hlen(schedule_error) > 0: dbCursor.executemany( 'INSERT INTO topspider.schedule_error_log VALUES(%s,%s,%s,%s,%s,%s,NOW())', [ resort([schedule, runDate, k, schedule_error] + parseLog(v)) for (k, v) in rdSvr.hgetall(schedule_error).items() ]) # rdSvr.delete(schedule_error) db.commit() dbCursor.close() db.close() del rdSvr
def parseRedisPageIndustryOverview(): dbConn = getDBConn('ibbd2') redisConn = getRedisConn() toDate = date.today() - timedelta(2) fromDate = toDate - timedelta(15) # start schedule_info(dbConn, redisConn) page_index(dbConn, redisConn) page_industry_overview_saletrend(dbConn, redisConn, fromDate, toDate) page_industry_overview_pricetrend(dbConn, redisConn, fromDate, toDate) page_industry_hotregion(dbConn, redisConn, fromDate, toDate) page_industry_overview(dbConn, redisConn, fromDate, toDate) page_industry_catsales(dbConn, redisConn, fromDate, toDate) redisConn.set('ibbd-ststc-date', (toDate + timedelta(1)).strftime('%Y/%m/%d')) # end dbConn.close() del redisConn del dbConn
def main(): # config exportpath = 'F:/' keyword = '女士手表' exportfile = os.path.join(exportpath, 'verified_%s_attr.txt' % keyword.decode('utf-8').encode('GBK')) # action dbConn = getDBConn('ibbd2') cursor = dbConn.cursor() print 'execute sql query' cursor.execute(sql % keyword) rows = cursor.fetchall() rows = [[row[0], (parser.unescape(row[1]).encode('GB2312', 'ignore').replace('\n', ' ') if row[1] else '')] for row in rows] if os.path.exists(exportfile): os.remove(exportfile) with open(exportfile, 'a') as f: f.write('\n'.join([str(row[0]) + '\t' + row[1] for row in rows])) cursor.close() dbConn.close()
def saveRedisScheduleError(runDate): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() def parseLog(val): j = {} try: j = json.loads(val) return [j['msg'], j['SlaveID']] except: try: j = json.loads(re.sub(r'(\d+)L', r'\1', val.replace("'", '"'))) except: j = {'msg': None, 'SlaveID': None} return [j['msg'], j['SlaveID']] def resort(arr): # resort [schedule,k,schedule_error,msg,slaveid] to [schedule,slaveid,k,schedule_error,msg] arr.insert(2, arr[-1]) return arr[:-1] for (schedule, schedule_error) in SCHEDULE_ERROR_MAP.items(): print schedule, schedule_error, rdSvr.hlen(schedule_error) if rdSvr.hlen(schedule_error) > 0: dbCursor.executemany('INSERT INTO topspider.schedule_error_log VALUES(%s,%s,%s,%s,%s,%s,NOW())', [resort([schedule, runDate, k, schedule_error] + parseLog(v)) for (k, v) in rdSvr.hgetall(schedule_error).items()]) # rdSvr.delete(schedule_error) db.commit() dbCursor.close() db.close() del rdSvr