class ExceedDateKeyCleanTask(object): ''' 删除超过N天数据的清理任务 ''' def __init__(self): self.__config = GetConfig() self.__log = Logger('exceedDateKeyCleanTask') self.__redisClient = RedisClient(self.__config.redis_host(), self.__config.redis_port(), self.__config.redis_password()) self.__taskName = 'clean exceed date redis key' self.__className = self.__class__ def __doStart(self, keyPrefix, db, date): startTime = time.time() taskName = self.__taskName + " by doStart" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__class__)) key = keyPrefix + date; self.__redisClient.unlink(db, key); self.__log.info("unlink key:{key}".format(key=key)) costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=key, db=db, cost=costTime)) def start(self, keyPrefix, db, validDays): """ 删除指定前缀的key :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) self.__doStart(keyPrefix, db, day) pass def startHour(self, keyPrefix, db, validDays): """ 清除时间格式为小时的key,即key的格式为keyPrefix_yyyyMMddHH :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) for i in xrange(0, 24): if i < 10: self.__doStart(keyPrefix, db, day + '0' + str(i)) else: self.__doStart(keyPrefix, db, day + str(i)) def startBatch(self, keyPrefix, db, validDays): """ 删除指定前缀的set中保存的所有value对应的key(set中保存的就是要删除的key) :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ startTime = time.time() taskName = self.__taskName + " by startBatch" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__className)) day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) setKey = keyPrefix + day cursor = 0 # 每次删除1000条记录 while True: scanResult = self.__redisClient.sscan(db, setKey, cursor, None, 1000) # 如果获取不到记录,则跳出循环 if len(scanResult) == 0: break cursor = scanResult[0] # 删除记录 if len(scanResult[1]) > 0: self.__redisClient.unlink(db, *scanResult[1]) self.__redisClient.srem(db, setKey, *scanResult[1]) # 如果已经检索完,则跳出循环 if cursor == 0: break costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=setKey, db=db, cost=costTime)) def startHscan(self, keyPrefix, db, validDays): """ 使用hscan删除大数据量的hash key :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ startTime = time.time() taskName = self.__taskName + " by startHscan" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__className)) day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day cursor = 0 # 每次删除1000条记录 while True: scanResult = self.__redisClient.hscan(db, key, cursor, None, 1000) # 如果获取不到记录,则跳出循环 if len(scanResult) == 0: break cursor = scanResult[0] # 获取map中的所有key keyList = [] if len(scanResult[1]) > 0: for (k, v) in scanResult[1].items(): keyList.append(k) # 批量删除记录 self.__redisClient.hdel(db, key, *keyList) # 如果已经检索完,则跳出循环 if cursor == 0: break costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=key, db=db, cost=costTime)) def startSscan(self, keyPrefix, db, validDays): """ 使用sscan删除大数据量的set key :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ startTime = time.time() taskName = self.__taskName + " by startSscan" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__className)) day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day cursor = 0 # 每次删除1000条记录 while True: scanResult = self.__redisClient.sscan(db, key, cursor, None, 1000) # 如果获取不到记录,则跳出循环 if len(scanResult) == 0: break cursor = scanResult[0] # 删除记录 if len(scanResult[1]) > 0: self.__redisClient.srem(db, key, *scanResult[1]) # 如果已经检索完,则跳出循环 if cursor == 0: break costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=key, db=db, cost=costTime))
class KeyCleanTaskTest(object): ''' KeyCleanTask 测试类 ''' def __init__(self): self.__keyCleanTask = KeyClearTask() self.__config = GetConfig() self.__redisClient = RedisClient(self.__config.redis_host(), self.__config.redis_port(), self.__config.redis_password()) def testDoTask(self): dayList = DateUtil.splitIntoDays(DateUtil.addDaysOnCurrent(-3), DateUtil.getCurrentDate()) hourList = DateUtil.splitIntoHours(DateUtil.addHoursOnCurrent(-6), DateUtil.getCurrentDate()) db = 0 print "----test doTask func begin" unlinkKeyList = [] unlinkSetKeyList = [] # 删除以day为单位的key for day in dayList: unlinkSetKeyList.append('launch_date_' + day) # 投放日期(Set),存储campaign Id 的set(白名单) unlinkKeyList.append('launch_date_2_creatives_' + day) # 投放日期到creative的映射关系,存储的是BitSet unlinkKeyList.append('fingerprint_2_creativeId_' + day) # fingerprint与creativeId对应关系 self.__redisClient.sadd(db, 'launch_date_' + day, *['mem1', 'mem2']) self.__redisClient.set(db, 'launch_date_2_creatives_' + day, 'launch_date_2_creatives_value1') self.__redisClient.set(db, 'fingerprint_2_creativeId_' + day, 'fingerprint_2_creativeId_value1') # 删除以hour为单位的key for hour in hourList: unlinkKeyList.append( 'account_unfreeze_time_' + hour) # 广告商账户级别的冻结(预算不足),该小时冻结,存储的是agentId set unlinkKeyList.append( 'campaign_unfreeze_time_' + hour) # 推广计划级别的冻结(预算不足),该小时冻结,存储的是campaign Id set(黑名单) unlinkKeyList.append('account_unfreeze_time_2_creatives_' + hour) # 账户级别的冻结时间到creative的映射关系,存储的是BitSet unlinkKeyList.append('campaign_unfreeze_time_2_creatives_' + hour) # 推广计划级别的冻结时间到creative的映射关系,存储的是BitSet self.__redisClient.set(db, 'account_unfreeze_time_' + hour, 'account_unfreeze_time_') self.__redisClient.set(db, 'campaign_unfreeze_time_' + hour, 'campaign_unfreeze_time_') self.__redisClient.set(db, 'account_unfreeze_time_2_creatives_' + hour, 'account_unfreeze_time_2_creatives_') self.__redisClient.set( db, 'campaign_unfreeze_time_2_creatives_' + hour, 'campaign_unfreeze_time_2_creatives_') print("init data:") print(self.__redisClient.mget(db, *unlinkKeyList)) for unlinkSetKey in unlinkSetKeyList: print(self.__redisClient.sscan(db, unlinkSetKey, 0, None, 3)) self.__keyCleanTask.doTask() print("after clean:") print(self.__redisClient.mget(db, *unlinkKeyList)) for unlinkSetKey in unlinkSetKeyList: print(self.__redisClient.sscan(db, unlinkSetKey, 0, None, 3))
class ExceedDateKeyCleanTaskTest(object): ''' KeyCleanTask 测试类 ''' def __init__(self): self.__exceedDateKeyCleanTask = ExceedDateKeyCleanTask() self.__config = GetConfig() self.__redisClient = RedisClient(self.__config.redis_host(), self.__config.redis_port(), self.__config.redis_password()) def testStart(self): keyPrefix = 'account_cost_' validDays = 2 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test start func begin" self.__redisClient.hset(db, key, 'map1', 'value1') self.__redisClient.hset(db, key, 'map2', 'value2') print("init data:") print(self.__redisClient.hscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.start(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.hscan(db, key, 0, None, 3)) pass def testStartHour(self): keyPrefix = 'data_cost_' validDays = 2 db = 3 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) print "----test startHour func begin" keyList = [] for i in xrange(0, 24): if i < 10: key = keyPrefix + day + '0' + str(i) else: key = keyPrefix + day + str(i) keyList.append(key) self.__redisClient.hset(db, key, 'map1', 'value1') self.__redisClient.hset(db, key, 'map2', 'value2') print("init data:") for key in keyList: print(self.__redisClient.hgetall(db, key)) self.__exceedDateKeyCleanTask.startHour(keyPrefix, db, validDays) print("after clean:") for key in keyList: print(self.__redisClient.hgetall(db, key)) pass def testStartBatch(self): keyPrefix = 'mix_detail_posid_' validDays = 30 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test startBatch func begin" self.__redisClient.sadd(db, key, *['mem1', 'mem2']) print("init data:") print(self.__redisClient.sscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.startBatch(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.sscan(db, key, 0, None, 3)) pass def testStartHscan(self): keyPrefix = 'view_st_ts_' validDays = 1 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test startHscan func begin" self.__redisClient.hset(db, key, 'map1', 'value1') self.__redisClient.hset(db, key, 'map2', 'value2') print("init data:") print(self.__redisClient.hscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.startHscan(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.hscan(db, key, 0, None, 3)) pass def testStartSscan(self): keyPrefix = 'click_st_' validDays = 1 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test startSscan func begin" self.__redisClient.sadd(db, key, *['mem1', 'mem2']) print("init data:") print(self.__redisClient.sscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.startSscan(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.sscan(db, key, 0, None, 3)) pass