def rollbackErrorSchedule(error_filter={}): """重新运行所有Error schedule """ server = getRedisConn2() for (k, v) in MAPPING.items(): if k in exclude_key: continue for vi in v: if not server.exists(vi): break # no filter ids = server.hkeys(vi) server.delete(vi) server.sadd(k, *ids) print vi, len(ids), k # filter # vals = server.hgetall(vi) # ids = set() # for k,v in vals.items(): # for fit in error_filter[k]: # if fit in v: # ids.add(k) # server.delete(vi) # server.sadd(k, *ids) # print vi, len(ids), k del server
def rollbackShopAllitemSchedule(): server = getRedisConn2() uids = [] for schedule in ['Schedule_shopAllitemInfo_process', 'Schedule_shopAllitemInfoError']: uids.extend(server.hkeys(schedule)) server.delete(schedule) server.sadd('Schedule_shopAllitemInfo', *uids) del server
def rollbackKeywordSearchResultSchedule(): server = getRedisConn2() kids = [] for schedule in ['Schedule_keywordSearchResultError', 'Schedule_keywordSearchResult_process']: kids.extend(server.hkeys(schedule)) server.delete(schedule) server.sadd('Schedule_keywordSearchResult', *kids) del server
def main(): interval = (atoi(sys.argv[1]) if len(sys.argv) > 1 else 60) slaveId = uuid.getnode() server = getRedisConn2() server.set('SlaveSleep%s' % slaveId, interval) server.expire('SlaveSleep%s' % slaveId, 60) del server print 'paused' raw_input('')
def rollbackErrorItemTradeSchedule(): server = getRedisConn2() vals = server.hgetall('Schedule_itemTradeError') iids = [] for (iid, val) in vals.items(): if val.find('None') != -1 or val.find('lost') != -1: iids.append(iid) server.sadd('Schedule_itemTrade', *iids) server.delete('Schedule_itemTradeError') del server
def clearAllErrorSchedule(): """清除所有错误Schedule """ server = getRedisConn2() for (k, v) in MAPPING.items(): for vi in v: print vi, server.hlen(vi) server.delete(vi) del server
def runSchedule(): reConn = getRedisConn2() uids = reConn.hkeys('Schedule_shopAllitemInfoError') for i, uid in enumerate(uids): url = 'http://store.taobao.com/shop/view_shop.htm?user_number_id=%s' % uid try: metadata = getShopItemsOverview(url) saveTopAllitemInfo(metadata) except Exception, e: print uid
def rollbackShopAllitemSchedule(): server = getRedisConn2() uids = [] for schedule in [ 'Schedule_shopAllitemInfo_process', 'Schedule_shopAllitemInfoError' ]: uids.extend(server.hkeys(schedule)) server.delete(schedule) server.sadd('Schedule_shopAllitemInfo', *uids) del server
def rollbackNullSiteIdItemSchedule(): print 'init...' server = getRedisConn2() iids = [] db_conn = ibbdlib.get_db_conn() for row in db_conn.iter("SELECT item_id FROM ibbd2.top_cat_items WHERE site_id=''"): iids.append(row.item_id) db_conn.close() print 'Schedule_itemFullInfo', len(iids) server.sadd('Schedule_itemFullInfo', *iids) del server
def rollbackKeywordSearchResultSchedule(): server = getRedisConn2() kids = [] for schedule in [ 'Schedule_keywordSearchResultError', 'Schedule_keywordSearchResult_process' ]: kids.extend(server.hkeys(schedule)) server.delete(schedule) server.sadd('Schedule_keywordSearchResult', *kids) del server
def main(): connpool = ConnectionPool(max_connections=20, **{ 'host': 'localhost', 'user': '******', 'passwd': '', 'db': 'ibbd2', }) reConn = getRedisConn2() test_schedule_cat(reConn, connpool) del reConn connpool.disconnect()
def rollbackNullSiteIdItemSchedule(): print 'init...' server = getRedisConn2() iids = [] db_conn = ibbdlib.get_db_conn() for row in db_conn.iter( "SELECT item_id FROM ibbd2.top_cat_items WHERE site_id=''"): iids.append(row.item_id) db_conn.close() print 'Schedule_itemFullInfo', len(iids) server.sadd('Schedule_itemFullInfo', *iids) del server
def saveRedisSchedulePlan(): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() for schedule in SCHEDULES: ids = rdSvr.smembers(schedule) dbCursor.executemany('INSERT INTO topspider.schedule_plan_log VALUES(%s,CURDATE(),%s)', [[schedule, iid] for iid in ids]) db.commit() dbCursor.close() db.close() del rdSvr
def saveRedisSchedulePlan(): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() for schedule in SCHEDULES: ids = rdSvr.smembers(schedule) dbCursor.executemany( 'INSERT INTO topspider.schedule_plan_log VALUES(%s,CURDATE(),%s)', [[schedule, iid] for iid in ids]) db.commit() dbCursor.close() db.close() del rdSvr
def saveRedisScheduleError(runDate): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() def parseLog(val): j = {} try: j = json.loads(val) return [j['msg'], j['SlaveID']] except: try: j = json.loads(re.sub(r'(\d+)L', r'\1', val.replace("'", '"'))) except: j = {'msg': None, 'SlaveID': None} return [j['msg'], j['SlaveID']] def resort(arr): # resort [schedule,k,schedule_error,msg,slaveid] to [schedule,slaveid,k,schedule_error,msg] arr.insert(2, arr[-1]) return arr[:-1] for (schedule, schedule_error) in SCHEDULE_ERROR_MAP.items(): print schedule, schedule_error, rdSvr.hlen(schedule_error) if rdSvr.hlen(schedule_error) > 0: dbCursor.executemany( 'INSERT INTO topspider.schedule_error_log VALUES(%s,%s,%s,%s,%s,%s,NOW())', [ resort([schedule, runDate, k, schedule_error] + parseLog(v)) for (k, v) in rdSvr.hgetall(schedule_error).items() ]) # rdSvr.delete(schedule_error) db.commit() dbCursor.close() db.close() del rdSvr
def saveRedisScheduleError(runDate): rdSvr = getRedisConn2() db = getDBConn('ibbd2') dbCursor = db.cursor() def parseLog(val): j = {} try: j = json.loads(val) return [j['msg'], j['SlaveID']] except: try: j = json.loads(re.sub(r'(\d+)L', r'\1', val.replace("'", '"'))) except: j = {'msg': None, 'SlaveID': None} return [j['msg'], j['SlaveID']] def resort(arr): # resort [schedule,k,schedule_error,msg,slaveid] to [schedule,slaveid,k,schedule_error,msg] arr.insert(2, arr[-1]) return arr[:-1] for (schedule, schedule_error) in SCHEDULE_ERROR_MAP.items(): print schedule, schedule_error, rdSvr.hlen(schedule_error) if rdSvr.hlen(schedule_error) > 0: dbCursor.executemany('INSERT INTO topspider.schedule_error_log VALUES(%s,%s,%s,%s,%s,%s,NOW())', [resort([schedule, runDate, k, schedule_error] + parseLog(v)) for (k, v) in rdSvr.hgetall(schedule_error).items()]) # rdSvr.delete(schedule_error) db.commit() dbCursor.close() db.close() del rdSvr