def file_write_redis(): ''' 将源文件中的内容写入redis中 :return: ''' gaode_file_path = file_exists() # # fanyule_two_game_file_path = r'/ftp_samba/112/file_4spider/dmn_fanyule2_game/' # startup_nodes = [{'host': 'redis1', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) # 高德 gaode_file = open(gaode_file_path, 'r') gaode_keyword_length = r.llen( 'spider:python:gaode:keyword') # redis中gaode的数据量 print 'redis中gaode_keyword列表长度:', gaode_keyword_length if gaode_keyword_length != 0: r.delete('spider:python:gaode:keyword') print 'redis中gaode_keyword列表长度不为0, 删除后的列表长度:', r.llen( 'spider:python:gaode:keyword') for line in gaode_file: new_line = line.strip() if new_line: r.rpush('spider:python:gaode:keyword', new_line) gaode_keyword_length = r.llen('spider:python:gaode:keyword') print '重新写入后redis中gaode_keyword列表长度:', gaode_keyword_length
def main(): lock = threading.Lock() print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'start' startup_nodes = [{'host': 'redis3', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) print 'redis中pubg_death昵称列表长度:', r.llen('spider:python:pubg_death:keyword') get_redis_proxy() # 将redis中的代理ip放入到PROXY_IP_Q队列中 proxy_count = PROXY_IP_Q.qsize() print time.strftime('[%Y-%m-%d %H:%M:%S]'), '代理ip队列中的ip数量:', proxy_count print time.strftime('[%Y-%m-%d %H:%M:%S]'), '开启的线程数:', proxy_count threads = [] for i in xrange(50): t = threading.Thread(target=pubg_userid, args=(lock, r)) t.start() threads.append(t) for t in threads: t.join() print '源数据剩余量:', r.llen('spider:python:pubg_death:keyword') print '抓取结束'
def main(): startup_nodes = [{'host': 'redis3', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) gaode_length = r.llen('spider:python:gaode:keyword:dest') # redis的数据量 print time.strftime( '[%Y-%m-%d %H:%M:%S]'), 'redis中gaode的数据量:', gaode_length lock = threading.Lock() first_hour = time.strftime('%H') date = time.strftime('%Y%m%d') dest_path = '/ftp_samba/112/spider/python/gd_location/' # 线上数据存储文件 if not os.path.exists(dest_path): os.makedirs(dest_path) dest_file_name = os.path.join(dest_path, 'gd_location_' + date) fileout = open(dest_file_name, 'a') threads = [] for i in xrange(1): t = threading.Thread(target=gaode, args=(lock, r, first_hour, fileout)) t.start() threads.append(t) for t in threads: t.join() try: fileout.flush() fileout.close() except IOError as e: time.sleep(2) fileout.close()
def main(): startup_nodes = [{'host': 'redis3', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) pubg_friends_length = r.llen( 'spider:python:pubg_friends:keyword:dest') # redis的数据量 print time.strftime( '[%Y-%m-%d %H:%M:%S]'), 'redis中pubg_friends的数据量:', pubg_friends_length lock = threading.Lock() first_hour = time.strftime('%H') # 小时 date = time.strftime('%Y%m%d') # 数据文件日期 dest_path = '/ftp_samba/112/spider/python/pubg' # linux上的文件目录 if not os.path.exists(dest_path): os.makedirs(dest_path) dest_file_name = os.path.join(dest_path, 'pubg_friends_' + date) fileout = open(dest_file_name, 'a') threads = [] for i in xrange(1): t = threading.Thread(target=pubg_friends, args=(lock, r, first_hour, fileout)) t.start() threads.append(t) for t in threads: t.join() try: fileout.flush() fileout.close() except IOError as e: time.sleep(2) fileout.close() print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'over'
def redis_cluster(): '''集群操作''' redis_nodes = [{ 'host': '10.12.28.222', 'port': 6380 }, { 'host': '10.12.28.222', 'port': 6381 }, { 'host': '10.12.28.224', 'port': 6380 }, { 'host': '10.12.28.224', 'port': 6381 }, { 'host': '10.12.28.227', 'port': 6380 }, { 'host': '10.12.28.227', 'port': 6381 }] try: r = StrictRedisCluster(startup_nodes=redis_nodes) except Exception as e: print("connect error %s" % e) # string 操作 r.set('thoth:thoth-ai:robot:1', 'kk') # r.delete('thoth:thoth-ai:robot:1') print("name is", r.get('thoth:thoth-ai:robot:1')) # list 操作 r.lpush('thoth:thoth-ai:robot:2', [[1, 2, 3], [2, 3, 4]]) print('list len:', r.llen("thoth:thoth-ai:robot:2")) # list size print("list ", r.lindex('thoth:thoth-ai:robot:2', 0)) # hash 操作 r.hset('thoth:thoth-ai:robot:3', 'private_vector', [[1, 2, 3], [2, 3, 4]]) r.hset('thoth:thoth-ai:robot:3', 'public_vector', [['4', 3, 2], [0, 1, 1]]) pv = r.hget( 'thoth:thoth-ai:robot:3', 'public_vector', ) print('hash.robot3.public_vector:', pv) aaa = pv.decode('utf-8') print(type(aaa), aaa) b = eval(aaa) # eval 函数妙用:将string‘[1,2,3]’--->list [1,2,3] print(type(b), b)
def exec_redis(redis_nodes,cmd_list): ''' redis_nodes = [{'host':'10.101.104.132','port':1321}, {'host':'10.101.104.132','port':1322}, {'host':'10.101.104.132','port':1323} {'host':'10.101.104.132','port':1323,'password':None,'db':0} ] cmd_list = [ #暂时只支持read,而且只有这几种 "get key1", "llen lis1", "smembers set1", "scard set1", "hgetall hashtable", "lindex list1 2", "hget hashtable key1", "lrange list1 1 3", ] ''' redisconn = StrictRedisCluster(startup_nodes=redis_nodes,decode_responses=True) res = {} for cmd in cmd_list: cmd = cmd.strip() arr = re.split(" +",cmd) type = arr[0].lower() if type == 'get': tmp = redisconn.get(arr[1]) elif type == 'hgetall': tmp = redisconn.hgetall(arr[1]) elif type == 'llen': tmp = redisconn.llen(arr[1]) elif type == 'smembers': tmp = redisconn.smembers(arr[1]) elif type == 'scard': tmp = redisconn.scard(arr[1]) elif type == 'hget': tmp = redisconn.hget(arr[1],arr[2]) elif type == 'lrange': tmp = redisconn.lrange(arr[1],arr[2],arr[3]) elif type == 'lindex': tmp = redisconn.lrange(arr[1],arr[2]) else: tmp = 'this cmd is not support yeild' res[cmd] = [json_decode(tmp)] return res
def get_redis_proxy(): ''' 从redis相应的key中获取代理ip :return: ''' startup_nodes = [{'host': 'redis1', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) weibo_user_proxy_length = r.llen('spider:weibo_user:proxy') # weibo_user print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'redis中weibo_user的代理ip长度:', weibo_user_proxy_length if weibo_user_proxy_length == 0: print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'redis中的代理ip数量为0,等待60s' time.sleep(60) return get_redis_proxy() for i in xrange(weibo_user_proxy_length): ip = r.lpop('spider:weibo_user:proxy') proxies = { 'http': "http://*****:*****@{ip}".format(ip=ip), 'https': "http://*****:*****@{ip}".format(ip=ip) } PROXY_IP_Q.put(proxies)
def main(): lock = Lock() print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'start' startup_nodes = [{'host': 'redis1', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) print 'redis中gaode_keyword来源列表长度:', r.llen('spider:python:gaode:keyword') key_test() # 将有效的key放入KEYS_QUEUE key = KEYS_QUEUE.get() print '获取的第一个key:', key threads = [] for i in xrange(50): t = threading.Thread(target=gaode, args=(lock, key, r)) t.start() threads.append(t) for t in threads: t.join() print time.strftime('[%Y-%m-%d %H:%M:%S]'), '抓取结束'
def get_redis_proxy(): ''' 从redis相应的key中获取代理ip(读取快代理的代理ip) :return: ''' startup_nodes = [{'host': 'redis2', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) baidu_zhishu_proxy_length = r.llen( 'spider:baidu_zhishu:proxy:kuai') # baidu_zhishu print time.strftime( '[%Y-%m-%d %H:%M:%S]' ), 'redis中baidu_zhishu的代理ip长度:', baidu_zhishu_proxy_length if baidu_zhishu_proxy_length == 0: print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'redis中的代理ip数量为0,等待60s' time.sleep(60) return get_redis_proxy() for i in xrange(20): ip = r.lpop('spider:baidu_zhishu:proxy:kuai') proxies = { 'http': "http://{ip}".format(ip=ip), # 'https': "http://{ip}".format(ip=ip) } PROXY_IP_Q.put(proxies)
def file_write_redis(): ''' 将源文件中的内容写入redis中 :return: ''' # wrd_keyword_file_path = file_exists() # 数据源文件路径 pubg_nickname_file_path = r'/ftp_samba/112/file_4spider/pubg_nickname/pubg_nickname' # 数据源文件路径 startup_nodes = [{'host': 'redis2', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) pubg_nickname_file = open(pubg_nickname_file_path, 'r') pubg_friends_nickname_length = r.llen('spider:python:pubg_friends:keyword') print 'redis中pubg_friends_nickname列表长度:', pubg_friends_nickname_length if pubg_friends_nickname_length < 200000: for line in pubg_nickname_file: r.rpush('spider:python:pubg_friends:keyword', line.strip()) pubg_friends_nickname_length = r.llen( 'spider:python:pubg_friends:keyword') print '重新写入后redis中pubg_friends_nickname列表长度:', pubg_friends_nickname_length pubg_nickname_file = open(pubg_nickname_file_path, 'r') pubg_match_nickname_length = r.llen('spider:python:pubg_match:keyword') print 'redis中pubg_match_nickname列表长度:', pubg_match_nickname_length if pubg_match_nickname_length < 200000: for line in pubg_nickname_file: r.rpush('spider:python:pubg_match:keyword', line.strip()) pubg_match_nickname_length = r.llen('spider:python:pubg_match:keyword') print '重新写入后redis中pubg_match_nickname列表长度:', pubg_match_nickname_length pubg_nickname_file = open(pubg_nickname_file_path, 'r') pubg_death_nickname_length = r.llen('spider:python:pubg_death:keyword') print 'redis中pubg_death_nickname列表长度:', pubg_death_nickname_length if pubg_death_nickname_length < 200000: for line in pubg_nickname_file: r.rpush('spider:python:pubg_death:keyword', line.strip()) pubg_death_nickname_length = r.llen('spider:python:pubg_death:keyword') print '重新写入后redis中pubg_death_nickname列表长度:', pubg_death_nickname_length
class RedisDB(): def __init__(self, ip_ports = IP_PORTS, db = DB, user_pass = USER_PASS): # super(RedisDB, self).__init__() if not hasattr(self,'_redis'): self._is_redis_cluster = False try: if len(ip_ports) > 1: startup_nodes = [] for ip_port in ip_ports: ip, port = ip_port.split(':') startup_nodes.append({"host":ip, "port":port}) self._redis = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) self._pipe = self._redis.pipeline(transaction=False) self._is_redis_cluster = True else: ip, port = ip_ports[0].split(':') self._redis = redis.Redis(host = ip, port = port, db = db, password = user_pass, decode_responses=True) # redis默认端口是6379 self._pipe = self._redis.pipeline(transaction=True) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 except Exception as e: raise else: log.info('连接到redis数据库 %s'%(tools.dumps_json(ip_ports))) def sadd(self, table, values): ''' @summary: 使用无序set集合存储数据, 去重 --------- @param table: @param values: 值; 支持list 或 单个值 --------- @result: 若库中存在 返回0,否则入库,返回1。 批量添加返回None ''' if isinstance(values, list): if not self._is_redis_cluster: self._pipe.multi() for value in values: self._pipe.sadd(table, value) self._pipe.execute() else: return self._redis.sadd(table, values) def sget(self, table, count = 0, is_pop = True): datas = [] if is_pop: count = count if count <= self.sget_count(table) else self.sget_count(table) if count: if count > 1: if not self._is_redis_cluster: self._pipe.multi() while count: self._pipe.spop(table) count -= 1 datas = self._pipe.execute() else: datas.append(self._redis.spop(table)) else: datas = self._redis.srandmember(table, count) return datas def sget_count(self, table): return self._redis.scard(table) def sdelete(self, table): ''' @summary: 删除set集合的大键(数据量大的表) 删除大set键,使用sscan命令,每次扫描集合中500个元素,再用srem命令每次删除一个键 若直接用delete命令,会导致Redis阻塞,出现故障切换和应用程序崩溃的故障。 --------- @param table: --------- @result: ''' # 当 SCAN 命令的游标参数被设置为 0 时, 服务器将开始一次新的迭代, 而当服务器向用户返回值为 0 的游标时, 表示迭代已结束 cursor = '0' while cursor != 0: cursor, data = self._redis.sscan(table, cursor = cursor, count = 10000) for item in data: # self._redis.srem(table,item) self._pipe.srem(table, item) # print('sdelete %s data size %s'%(table, len(data))) self._pipe.execute() def zadd(self, table, values, prioritys = 0): ''' @summary: 使用有序set集合存储数据, 去重(值存在更新) --------- @param table: @param values: 值; 支持list 或 单个值 @param prioritys: 优先级; double类型,支持list 或 单个值。 根据此字段的值来排序, 值越小越优先。 可不传值,默认value的优先级为0 --------- @result:若库中存在 返回0,否则入库,返回1。 批量添加返回None ''' if isinstance(values, list): if not isinstance(prioritys, list): prioritys = [prioritys] * len(values) else: assert len(values) == len(prioritys), 'values值要与prioritys值一一对应' if not self._is_redis_cluster: self._pipe.multi() for value, priority in zip(values, prioritys): if self._is_redis_cluster: self._pipe.zadd(table, priority, value) else: self._pipe.zadd(table, value, priority) self._pipe.execute() else: if self._is_redis_cluster: return self._redis.zadd(table, prioritys, values) else: return self._redis.zadd(table, values, prioritys) def zget(self, table, count = 0, is_pop = True): ''' @summary: 从有序set集合中获取数据 --------- @param table: @param count: 数量 @param is_pop:获取数据后,是否在原set集合中删除,默认是 --------- @result: 列表 ''' start_pos = 0 # 包含 end_pos = 0 if count == 0 else count - 1 # 包含 if not self._is_redis_cluster: self._pipe.multi() # 标记事务的开始 参考 http://www.runoob.com/redis/redis-transactions.html self._pipe.zrange(table, start_pos, end_pos) # 取值 if is_pop: self._pipe.zremrangebyrank(table, start_pos, end_pos) # 删除 results, count = self._pipe.execute() return results def zget_count(self, table, priority_min = None, priority_max = None): ''' @summary: 获取表数据的数量 --------- @param table: @param priority_min:优先级范围 最小值(包含) @param priority_max:优先级范围 最大值(包含) --------- @result: ''' if priority_min != None and priority_max != None: return self._redis.zcount(table, priority_min, priority_max) else: return self._redis.zcard(table) def lpush(self, table, values): if isinstance(values, list): if not self._is_redis_cluster: self._pipe.multi() for value in values: self._pipe.rpush(table, value) self._pipe.execute() else: return self._redis.rpush(table, values) def lpop(self, table, count = 1): ''' @summary: --------- @param table: @param count: --------- @result: 返回列表 ''' datas = [] count = count if count <= self.lget_count(table) else self.lget_count(table) if count: if count > 1: if not self._is_redis_cluster: self._pipe.multi() while count: data = self._pipe.lpop(table) count -= 1 datas = self._pipe.execute() else: datas.append(self._redis.lpop(table)) return datas def lget_count(self, table): return self._redis.llen(table) def setbit(self, table, offset, value): self._redis.setbit(table, offset, value) def getbit(self, table, offset): return self._redis.getbit(table, offset) def clear(self, table): try: self._redis.delete(table) except Exception as e: log.error(e)
class SharQ(object): """The SharQ object is the core of this queue. SharQ does the following. 1. Accepts a configuration file. 2. Initializes the queue. 3. Exposes functions to interact with the queue. """ def __init__(self, config_path): """Construct a SharQ object by doing the following. 1. Read the configuration path. 2. Load the config. 3. Initialized SharQ. """ self.config_path = config_path self._load_config() self._initialize() def _initialize(self): """Read the SharQ configuration and set appropriate variables. Open a redis connection pool and load all the Lua scripts. """ self._key_prefix = self._config.get('redis', 'key_prefix') self._job_expire_interval = int( self._config.get('sharq', 'job_expire_interval')) self._default_job_requeue_limit = int( self._config.get('sharq', 'default_job_requeue_limit')) # initalize redis redis_connection_type = self._config.get('redis', 'conn_type') db = self._config.get('redis', 'db') if redis_connection_type == 'unix_sock': self._r = redis.StrictRedis(db=db, unix_socket_path=self._config.get( 'redis', 'unix_socket_path')) elif redis_connection_type == 'tcp_sock': if self._config.getboolean('redis', 'clustered', fallback=False): startup_nodes = [{ "host": self._config.get('redis', 'host'), "port": self._config.get('redis', 'port') }] self._r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True, skip_full_coverage_check=True) else: self._r = redis.StrictRedis( db=db, host=self._config.get('redis', 'host'), port=self._config.get('redis', 'port')) self._load_lua_scripts() def _load_config(self): """Read the configuration file and load it into memory.""" self._config = ConfigParser.SafeConfigParser() self._config.read(self.config_path) def reload_config(self, config_path=None): """Reload the configuration from the new config file if provided else reload the current config file. """ if config_path: self.config_path = config_path self._load_config() def _load_lua_scripts(self): """Loads all lua scripts required by SharQ.""" # load lua scripts lua_script_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'scripts/lua') with open(os.path.join(lua_script_path, 'enqueue.lua'), 'r') as enqueue_file: self._lua_enqueue_script = enqueue_file.read() self._lua_enqueue = self._r.register_script( self._lua_enqueue_script) with open(os.path.join(lua_script_path, 'dequeue.lua'), 'r') as dequeue_file: self._lua_dequeue_script = dequeue_file.read() self._lua_dequeue = self._r.register_script( self._lua_dequeue_script) with open(os.path.join(lua_script_path, 'finish.lua'), 'r') as finish_file: self._lua_finish_script = finish_file.read() self._lua_finish = self._r.register_script(self._lua_finish_script) with open(os.path.join(lua_script_path, 'interval.lua'), 'r') as interval_file: self._lua_interval_script = interval_file.read() self._lua_interval = self._r.register_script( self._lua_interval_script) with open(os.path.join(lua_script_path, 'requeue.lua'), 'r') as requeue_file: self._lua_requeue_script = requeue_file.read() self._lua_requeue = self._r.register_script( self._lua_requeue_script) with open(os.path.join(lua_script_path, 'metrics.lua'), 'r') as metrics_file: self._lua_metrics_script = metrics_file.read() self._lua_metrics = self._r.register_script( self._lua_metrics_script) def reload_lua_scripts(self): """Lets user reload the lua scripts in run time.""" self._load_lua_scripts() def enqueue(self, payload, interval, job_id, queue_id, queue_type='default', requeue_limit=None): """Enqueues the job into the specified queue_id of a particular queue_type """ # validate all the input if not is_valid_interval(interval): raise BadArgumentException('`interval` has an invalid value.') if not is_valid_identifier(job_id): raise BadArgumentException('`job_id` has an invalid value.') if not is_valid_identifier(queue_id): raise BadArgumentException('`queue_id` has an invalid value.') if not is_valid_identifier(queue_type): raise BadArgumentException('`queue_type` has an invalid value.') if requeue_limit is None: requeue_limit = self._default_job_requeue_limit if not is_valid_requeue_limit(requeue_limit): raise BadArgumentException('`requeue_limit` has an invalid value.') try: serialized_payload = serialize_payload(payload) except TypeError as e: raise BadArgumentException(e.message) timestamp = str(generate_epoch()) keys = [self._key_prefix, queue_type] args = [ timestamp, queue_id, job_id, '"%s"' % serialized_payload, interval, requeue_limit ] self._lua_enqueue(keys=keys, args=args) response = {'status': 'queued'} return response def dequeue(self, queue_type='default'): """Dequeues a job from any of the ready queues based on the queue_type. If no job is ready, returns a failure status. """ if not is_valid_identifier(queue_type): raise BadArgumentException('`queue_type` has an invalid value.') timestamp = str(generate_epoch()) keys = [self._key_prefix, queue_type] args = [timestamp, self._job_expire_interval] dequeue_response = self._lua_dequeue(keys=keys, args=args) if len(dequeue_response) < 4: response = {'status': 'failure'} return response queue_id, job_id, payload, requeues_remaining = dequeue_response payload = deserialize_payload(payload[1:-1]) response = { 'status': 'success', 'queue_id': queue_id, 'job_id': job_id, 'payload': payload, 'requeues_remaining': int(requeues_remaining) } return response def finish(self, job_id, queue_id, queue_type='default'): """Marks any dequeued job as *completed successfully*. Any job which gets a finish will be treated as complete and will be removed from the SharQ. """ if not is_valid_identifier(job_id): raise BadArgumentException('`job_id` has an invalid value.') if not is_valid_identifier(queue_id): raise BadArgumentException('`queue_id` has an invalid value.') if not is_valid_identifier(queue_type): raise BadArgumentException('`queue_type` has an invalid value.') keys = [self._key_prefix, queue_type] args = [queue_id, job_id] response = {'status': 'success'} finish_response = self._lua_finish(keys=keys, args=args) if finish_response == 0: # the finish failed. response.update({'status': 'failure'}) return response def interval(self, interval, queue_id, queue_type='default'): """Updates the interval for a specific queue_id of a particular queue type. """ # validate all the input if not is_valid_interval(interval): raise BadArgumentException('`interval` has an invalid value.') if not is_valid_identifier(queue_id): raise BadArgumentException('`queue_id` has an invalid value.') if not is_valid_identifier(queue_type): raise BadArgumentException('`queue_type` has an invalid value.') # generate the interval key interval_hmap_key = '%s:interval' % self._key_prefix interval_queue_key = '%s:%s' % (queue_type, queue_id) keys = [interval_hmap_key, interval_queue_key] args = [interval] interval_response = self._lua_interval(keys=keys, args=args) if interval_response == 0: # the queue with the id and type does not exist. response = {'status': 'failure'} else: response = {'status': 'success'} return response def requeue(self): """Re-queues any expired job (one which does not get an expire before the job_expiry_interval) back into their respective queue. This function has to be run at specified intervals to ensure the expired jobs are re-queued back. """ timestamp = str(generate_epoch()) # get all queue_types and requeue one by one. # not recommended to do this entire process # in lua as it might take long and block other # enqueues and dequeues. active_queue_type_list = self._r.smembers('%s:active:queue_type' % self._key_prefix) for queue_type in active_queue_type_list: # requeue all expired jobs in all queue types. keys = [self._key_prefix, queue_type] args = [timestamp] job_discard_list = self._lua_requeue(keys=keys, args=args) # discard the jobs if any for job in job_discard_list: queue_id, job_id = job.split(':') # explicitly finishing a job # is nothing but discard. self.finish(job_id=job_id, queue_id=queue_id, queue_type=queue_type) def metrics(self, queue_type=None, queue_id=None): """Provides a way to get statistics about various parameters like, * global enqueue / dequeue rates per min. * per queue enqueue / dequeue rates per min. * queue length of each queue. * list of queue ids for each queue type. """ if queue_id is not None and not is_valid_identifier(queue_id): raise BadArgumentException('`queue_id` has an invalid value.') if queue_type is not None and not is_valid_identifier(queue_type): raise BadArgumentException('`queue_type` has an invalid value.') response = {'status': 'failure'} if not queue_type and not queue_id: # return global stats. # list of active queue types (ready + active) active_queue_types = self._r.smembers('%s:active:queue_type' % self._key_prefix) ready_queue_types = self._r.smembers('%s:ready:queue_type' % self._key_prefix) all_queue_types = active_queue_types | ready_queue_types # global rates for past 10 minutes timestamp = str(generate_epoch()) keys = [self._key_prefix] args = [timestamp] enqueue_details, dequeue_details = self._lua_metrics(keys=keys, args=args) enqueue_counts = {} dequeue_counts = {} # the length of enqueue & dequeue details are always same. for i in xrange(0, len(enqueue_details), 2): enqueue_counts[str(enqueue_details[i])] = int( enqueue_details[i + 1] or 0) dequeue_counts[str(dequeue_details[i])] = int( dequeue_details[i + 1] or 0) response.update({ 'status': 'success', 'queue_types': list(all_queue_types), 'enqueue_counts': enqueue_counts, 'dequeue_counts': dequeue_counts }) return response elif queue_type and not queue_id: # return list of queue_ids. # get data from two sorted sets in a transaction pipe = self._r.pipeline() pipe.zrange('%s:%s' % (self._key_prefix, queue_type), 0, -1) pipe.zrange('%s:%s:active' % (self._key_prefix, queue_type), 0, -1) ready_queues, active_queues = pipe.execute() # extract the queue_ids from the queue_id:job_id string active_queues = [i.split(':')[0] for i in active_queues] all_queue_set = set(ready_queues) | set(active_queues) response.update({ 'status': 'success', 'queue_ids': list(all_queue_set) }) return response elif queue_type and queue_id: # return specific details. active_queue_types = self._r.smembers('%s:active:queue_type' % self._key_prefix) ready_queue_types = self._r.smembers('%s:ready:queue_type' % self._key_prefix) all_queue_types = active_queue_types | ready_queue_types # queue specific rates for past 10 minutes timestamp = str(generate_epoch()) keys = ['%s:%s:%s' % (self._key_prefix, queue_type, queue_id)] args = [timestamp] enqueue_details, dequeue_details = self._lua_metrics(keys=keys, args=args) enqueue_counts = {} dequeue_counts = {} # the length of enqueue & dequeue details are always same. for i in xrange(0, len(enqueue_details), 2): enqueue_counts[str(enqueue_details[i])] = int( enqueue_details[i + 1] or 0) dequeue_counts[str(dequeue_details[i])] = int( dequeue_details[i + 1] or 0) # get the queue length for the job queue queue_length = self._r.llen( '%s:%s:%s' % (self._key_prefix, queue_type, queue_id)) response.update({ 'status': 'success', 'queue_length': int(queue_length), 'enqueue_counts': enqueue_counts, 'dequeue_counts': dequeue_counts }) return response elif not queue_type and queue_id: raise BadArgumentException( '`queue_id` should be accompanied by `queue_type`.') return response def clear_queue(self, queue_type=None, queue_id=None, purge_all=False): """clear the all entries in queue with particular queue_id and queue_type. It takes an optional argument, purge_all : if True, then it will remove the related resources from the redis. """ if queue_id is None or not is_valid_identifier(queue_id): raise BadArgumentException('`queue_id` has an invalid value.') if queue_type is None or not is_valid_identifier(queue_type): raise BadArgumentException('`queue_type` has an invalid value.') response = {'status': 'Failure', 'message': 'No queued calls found'} # remove from the primary sorted set primary_set = '{}:{}'.format(self._key_prefix, queue_type) queued_status = self._r.zrem(primary_set, queue_id) if queued_status: response.update({ 'status': 'Success', 'message': 'Successfully removed all queued calls' }) # do a full cleanup of reources # although this is not necessary as we don't remove resources # while dequeue operation job_queue_list = '{}:{}:{}'.format(self._key_prefix, queue_type, queue_id) if queued_status and purge_all: job_list = self._r.lrange(job_queue_list, 0, -1) pipe = self._r.pipeline() # clear the payload data for job_uuid for job_uuid in job_list: if job_uuid is None: continue payload_set = '{}:payload'.format(self._key_prefix) job_payload_key = '{}:{}:{}'.format(queue_type, queue_id, job_uuid) pipe.hdel(payload_set, job_payload_key) # clear jobrequest interval interval_set = '{}:interval'.format(self._key_prefix) job_interval_key = '{}:{}'.format(queue_type, queue_id) pipe.hdel(interval_set, job_interval_key) # clear job_queue_list pipe.delete(job_queue_list) pipe.execute() response.update({ 'status': 'Success', 'message': 'Successfully removed all queued calls and purged related resources' }) else: # always delete the job queue list self._r.delete(job_queue_list) return response
class RedisDB: def __init__(self, ip_ports=None, db=None, user_pass=None, url=None, decode_responses=True, service_name=None, max_connections=32, **kwargs): """ redis的封装 Args: ip_ports: ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"] db: user_pass: url: decode_responses: service_name: 适用于redis哨兵模式 """ # 可能会改setting中的值,所以此处不能直接赋值为默认值,需要后加载赋值 if ip_ports is None: ip_ports = setting.REDISDB_IP_PORTS if db is None: db = setting.REDISDB_DB if user_pass is None: user_pass = setting.REDISDB_USER_PASS if service_name is None: service_name = setting.REDISDB_SERVICE_NAME self._is_redis_cluster = False try: if not url: ip_ports = (ip_ports if isinstance(ip_ports, list) else ip_ports.split(",")) if len(ip_ports) > 1: startup_nodes = [] for ip_port in ip_ports: ip, port = ip_port.split(":") startup_nodes.append({"host": ip, "port": port}) if service_name: log.debug("使用redis哨兵模式") hosts = [(node["host"], node["port"]) for node in startup_nodes] sentinel = Sentinel(hosts, socket_timeout=3, **kwargs) self._redis = sentinel.master_for( service_name, password=user_pass, db=db, redis_class=redis.StrictRedis, decode_responses=decode_responses, max_connections=max_connections, **kwargs) else: log.debug("使用redis集群模式") self._redis = StrictRedisCluster( startup_nodes=startup_nodes, decode_responses=decode_responses, password=user_pass, max_connections=max_connections, **kwargs) self._is_redis_cluster = True else: ip, port = ip_ports[0].split(":") self._redis = redis.StrictRedis( host=ip, port=port, db=db, password=user_pass, decode_responses=decode_responses, max_connections=max_connections, **kwargs) else: self._redis = redis.StrictRedis.from_url( url, decode_responses=decode_responses) except Exception as e: raise else: if not url: log.debug("连接到redis数据库 %s db%s" % (ip_ports, db)) else: log.debug("连接到redis数据库 %s" % (url)) self._ip_ports = ip_ports self._db = db self._user_pass = user_pass self._url = url def __repr__(self): if self._url: return "<Redisdb url:{}>".format(self._url) return "<Redisdb ip_ports: {} db:{} user_pass:{}>".format( self._ip_ports, self._db, self._user_pass) @classmethod def from_url(cls, url): return cls(url=url) def sadd(self, table, values): """ @summary: 使用无序set集合存储数据, 去重 --------- @param table: @param values: 值; 支持list 或 单个值 --------- @result: 若库中存在 返回0,否则入库,返回1。 批量添加返回None """ if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.sadd(table, value) pipe.execute() else: return self._redis.sadd(table, values) def sget(self, table, count=1, is_pop=True): """ 返回 list 如 ['1'] 或 [] @param table: @param count: @param is_pop: @return: """ datas = [] if is_pop: count = count if count <= self.sget_count( table) else self.sget_count(table) if count: if count > 1: pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() while count: pipe.spop(table) count -= 1 datas = pipe.execute() else: datas.append(self._redis.spop(table)) else: datas = self._redis.srandmember(table, count) return datas def srem(self, table, values): """ @summary: 移除集合中的指定元素 --------- @param table: @param values: 一个或者列表 --------- @result: """ if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.srem(table, value) pipe.execute() else: self._redis.srem(table, values) def sget_count(self, table): return self._redis.scard(table) def sdelete(self, table): """ @summary: 删除set集合的大键(数据量大的表) 删除大set键,使用sscan命令,每次扫描集合中500个元素,再用srem命令每次删除一个键 若直接用delete命令,会导致Redis阻塞,出现故障切换和应用程序崩溃的故障。 --------- @param table: --------- @result: """ # 当 SCAN 命令的游标参数被设置为 0 时, 服务器将开始一次新的迭代, 而当服务器向用户返回值为 0 的游标时, 表示迭代已结束 cursor = "0" while cursor != 0: cursor, data = self._redis.sscan(table, cursor=cursor, count=500) for item in data: # pipe.srem(table, item) self._redis.srem(table, item) # pipe.execute() def sismember(self, table, key): "Return a boolean indicating if ``value`` is a member of set ``name``" return self._redis.sismember(table, key) def zadd(self, table, values, prioritys=0): """ @summary: 使用有序set集合存储数据, 去重(值存在更新) --------- @param table: @param values: 值; 支持list 或 单个值 @param prioritys: 优先级; double类型,支持list 或 单个值。 根据此字段的值来排序, 值越小越优先。 可不传值,默认value的优先级为0 --------- @result:若库中存在 返回0,否则入库,返回1。 批量添加返回 [0, 1 ...] """ if isinstance(values, list): if not isinstance(prioritys, list): prioritys = [prioritys] * len(values) else: assert len(values) == len(prioritys), "values值要与prioritys值一一对应" pipe = self._redis.pipeline(transaction=True) if not self._is_redis_cluster: pipe.multi() for value, priority in zip(values, prioritys): pipe.zadd(table, priority, value) return pipe.execute() else: return self._redis.zadd(table, prioritys, values) def zget(self, table, count=1, is_pop=True): """ @summary: 从有序set集合中获取数据 优先返回分数小的(优先级高的) --------- @param table: @param count: 数量 -1 返回全部数据 @param is_pop:获取数据后,是否在原set集合中删除,默认是 --------- @result: 列表 """ start_pos = 0 # 包含 end_pos = count - 1 if count > 0 else count pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi( ) # 标记事务的开始 参考 http://www.runoob.com/redis/redis-transactions.html pipe.zrange(table, start_pos, end_pos) # 取值 if is_pop: pipe.zremrangebyrank(table, start_pos, end_pos) # 删除 results, *count = pipe.execute() return results def zremrangebyscore(self, table, priority_min, priority_max): """ 根据分数移除成员 闭区间 @param table: @param priority_min: @param priority_max: @return: 被移除的成员个数 """ return self._redis.zremrangebyscore(table, priority_min, priority_max) def zrangebyscore(self, table, priority_min, priority_max, count=None, is_pop=True): """ @summary: 返回指定分数区间的数据 闭区间 --------- @param table: @param priority_min: 优先级越小越优先 @param priority_max: @param count: 获取的数量,为空则表示分数区间内的全部数据 @param is_pop: 是否删除 --------- @result: """ # 使用lua脚本, 保证操作的原子性 lua = """ local key = KEYS[1] local min_score = ARGV[2] local max_score = ARGV[3] local is_pop = ARGV[4] local count = ARGV[5] -- 取值 local datas = nil if count then datas = redis.call('zrangebyscore', key, min_score, max_score, 'limit', 0, count) else datas = redis.call('zrangebyscore', key, min_score, max_score) end -- 删除redis中刚取到的值 if (is_pop) then for i=1, #datas do redis.call('zrem', key, datas[i]) end end return datas """ cmd = self._redis.register_script(lua) if count: res = cmd(keys=[table], args=[table, priority_min, priority_max, is_pop, count]) else: res = cmd(keys=[table], args=[table, priority_min, priority_max, is_pop]) return res def zrangebyscore_increase_score(self, table, priority_min, priority_max, increase_score, count=None): """ @summary: 返回指定分数区间的数据 闭区间, 同时修改分数 --------- @param table: @param priority_min: 最小分数 @param priority_max: 最大分数 @param increase_score: 分数值增量 正数则在原有的分数上叠加,负数则相减 @param count: 获取的数量,为空则表示分数区间内的全部数据 --------- @result: """ # 使用lua脚本, 保证操作的原子性 lua = """ local key = KEYS[1] local min_score = ARGV[1] local max_score = ARGV[2] local increase_score = ARGV[3] local count = ARGV[4] -- 取值 local datas = nil if count then datas = redis.call('zrangebyscore', key, min_score, max_score, 'limit', 0, count) else datas = redis.call('zrangebyscore', key, min_score, max_score) end --修改优先级 for i=1, #datas do redis.call('zincrby', key, increase_score, datas[i]) end return datas """ cmd = self._redis.register_script(lua) if count: res = cmd(keys=[table], args=[priority_min, priority_max, increase_score, count]) else: res = cmd(keys=[table], args=[priority_min, priority_max, increase_score]) return res def zrangebyscore_set_score(self, table, priority_min, priority_max, score, count=None): """ @summary: 返回指定分数区间的数据 闭区间, 同时修改分数 --------- @param table: @param priority_min: 最小分数 @param priority_max: 最大分数 @param score: 分数值 @param count: 获取的数量,为空则表示分数区间内的全部数据 --------- @result: """ # 使用lua脚本, 保证操作的原子性 lua = """ local key = KEYS[1] local min_score = ARGV[1] local max_score = ARGV[2] local set_score = ARGV[3] local count = ARGV[4] -- 取值 local datas = nil if count then datas = redis.call('zrangebyscore', key, min_score, max_score, 'withscores','limit', 0, count) else datas = redis.call('zrangebyscore', key, min_score, max_score, 'withscores') end local real_datas = {} -- 数据 --修改优先级 for i=1, #datas, 2 do local data = datas[i] local score = datas[i+1] table.insert(real_datas, data) -- 添加数据 redis.call('zincrby', key, set_score - score, datas[i]) end return real_datas """ cmd = self._redis.register_script(lua) if count: res = cmd(keys=[table], args=[priority_min, priority_max, score, count]) else: res = cmd(keys=[table], args=[priority_min, priority_max, score]) return res def zget_count(self, table, priority_min=None, priority_max=None): """ @summary: 获取表数据的数量 --------- @param table: @param priority_min:优先级范围 最小值(包含) @param priority_max:优先级范围 最大值(包含) --------- @result: """ if priority_min != None and priority_max != None: return self._redis.zcount(table, priority_min, priority_max) else: return self._redis.zcard(table) def zrem(self, table, values): """ @summary: 移除集合中的指定元素 --------- @param table: @param values: 一个或者列表 --------- @result: """ if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.zrem(table, value) pipe.execute() else: self._redis.zrem(table, values) def zexists(self, table, values): """ 利用zscore判断某元素是否存在 @param values: @return: """ is_exists = [] if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 pipe.multi() for value in values: pipe.zscore(table, value) is_exists_temp = pipe.execute() for is_exist in is_exists_temp: if is_exist != None: is_exists.append(1) else: is_exists.append(0) else: is_exists = self._redis.zscore(table, values) is_exists = 1 if is_exists != None else 0 return is_exists def lpush(self, table, values): if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.rpush(table, value) pipe.execute() else: return self._redis.rpush(table, values) def lpop(self, table, count=1): """ @summary: --------- @param table: @param count: --------- @result: count>1时返回列表 """ datas = None count = count if count <= self.lget_count(table) else self.lget_count( table) if count: if count > 1: pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() while count: pipe.lpop(table) count -= 1 datas = pipe.execute() else: datas = self._redis.lpop(table) return datas def rpoplpush(self, from_table, to_table=None): """ 将列表 from_table 中的最后一个元素(尾元素)弹出,并返回给客户端。 将 from_table 弹出的元素插入到列表 to_table ,作为 to_table 列表的的头元素。 如果 from_table 和 to_table 相同,则列表中的表尾元素被移动到表头,并返回该元素,可以把这种特殊情况视作列表的旋转(rotation)操作 @param from_table: @param to_table: @return: """ if not to_table: to_table = from_table return self._redis.rpoplpush(from_table, to_table) def lget_count(self, table): return self._redis.llen(table) def lrem(self, table, value, num=0): return self._redis.lrem(table, value, num) def hset(self, table, key, value): """ @summary: 如果 key 不存在,一个新的哈希表被创建并进行 HSET 操作。 如果域 field 已经存在于哈希表中,旧值将被覆盖 --------- @param table: @param key: @param value: --------- @result: 1 新插入; 0 覆盖 """ return self._redis.hset(table, key, value) def hset_batch(self, table, datas): """ 批量插入 Args: datas: [[key, value]] Returns: """ pipe = self._redis.pipeline(transaction=True) if not self._is_redis_cluster: pipe.multi() for key, value in datas: pipe.hset(table, key, value) return pipe.execute() def hincrby(self, table, key, increment): return self._redis.hincrby(table, key, increment) def hget(self, table, key, is_pop=False): if not is_pop: return self._redis.hget(table, key) else: lua = """ local key = KEYS[1] local field = ARGV[1] -- 取值 local datas = redis.call('hget', key, field) -- 删除值 redis.call('hdel', key, field) return datas """ cmd = self._redis.register_script(lua) res = cmd(keys=[table], args=[key]) return res def hgetall(self, table): return self._redis.hgetall(table) def hexists(self, table, key): return self._redis.hexists(table, key) def hdel(self, table, *keys): """ @summary: 删除对应的key 可传多个 --------- @param table: @param *keys: --------- @result: """ self._redis.hdel(table, *keys) def hget_count(self, table): return self._redis.hlen(table) def setbit(self, table, offsets, values): """ 设置字符串数组某一位的值, 返回之前的值 @param table: @param offsets: 支持列表或单个值 @param values: 支持列表或单个值 @return: list / 单个值 """ if isinstance(offsets, list): if not isinstance(values, list): values = [values] * len(offsets) else: assert len(offsets) == len(values), "offsets值要与values值一一对应" pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 pipe.multi() for offset, value in zip(offsets, values): pipe.setbit(table, offset, value) return pipe.execute() else: return self._redis.setbit(table, offsets, values) def getbit(self, table, offsets): """ 取字符串数组某一位的值 @param table: @param offsets: 支持列表 @return: list / 单个值 """ if isinstance(offsets, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 pipe.multi() for offset in offsets: pipe.getbit(table, offset) return pipe.execute() else: return self._redis.getbit(table, offsets) def bitcount(self, table): return self._redis.bitcount(table) def strset(self, table, value, **kwargs): return self._redis.set(table, value, **kwargs) def str_incrby(self, table, value): return self._redis.incrby(table, value) def strget(self, table): return self._redis.get(table) def strlen(self, table): return self._redis.strlen(table) def getkeys(self, regex): return self._redis.keys(regex) def exists_key(self, key): return self._redis.exists(key) def set_expire(self, key, seconds): """ @summary: 设置过期时间 --------- @param key: @param seconds: 秒 --------- @result: """ self._redis.expire(key, seconds) def clear(self, table): try: self._redis.delete(table) except Exception as e: log.error(e) def get_redis_obj(self): return self._redis
class RedisClient(object): def __init__(self, key, startup_nodes): """ init cluster """ self.key = key self.conn = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) def hdel(self, field): """ delete an item :param field: :return: """ self.conn.hdel(self.key, field) def hexists(self, field): """ 判断 key 中是否含有 field :param field: :return: """ return self.conn.hexists(self.key, field) def hget(self, field): """ 返回key中指定 field 中的 value :param field: :return: """ value = self.conn.hget(self.key, field) if isinstance(value, bytes): return value.decode('utf-8') else: return value if value else None def hgetall(self): """ 获取 {filed: value, field1: value1....} :return: """ all_dict = self.conn.hgetall(self.key) if not all_dict: return elif sys.version_info.major == 3: return { field.decode('utf-8'): value.decode('utf-8') for field, value in all_dict.items() } else: return all_dict def hkeys(self): """ 获取key中所有field :return: """ field = self.conn.hkeys(self.key) if isinstance(field, bytes): return field.decode('utf-8') else: return field if field else None def hlen(self): """ 获取所有 filed 数量 :return: """ return self.conn.hlen(self.key) def hset(self, field, value): """ 设置 field: value :param field: :param value: :return: """ self.conn.hset(self.key, field, value) def hvals(self): """ 获取所有values :return: """ values = self.conn.hvals(self.key) if not values: return elif sys.version_info.major == 3: return [value.decode('utf-8') for value in values] else: return values def change_key(self, key): """ 替换 key :param key: :return: """ self.key = key # =============================================== def blpop(self, timeout): self.conn.blpop(self.key, timeout=timeout) def brpop(self, timeout): self.conn.brpop(self.key, timeout=timeout) def brpoplpush(self, dst, timeout): self.conn.brpoplpush(self.key, dst=dst, timeout=timeout) def lindex(self, i): self.conn.lindex(self.key, index=i) def llen(self): self.conn.llen(self.key) def lpop(self): self.conn.lpop(self.key) def lpush(self): self.conn.lpush(self.key) def lrange(self, start, stop): self.conn.lrange(self.key, start, stop) def lset(self, i, value): self.conn.lset(self.key, index=i, value=value) def rpop(self): self.conn.rpop(self.key) def rpoplpush(self, dst): self.conn.rpoplpush(self.key, dst=dst) def rpush(self, value): self.conn.rpush(self.key, value)
class RedisMiddleware(object): """ 任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本 """ def __init__(self, redis_params): self.redis_cli = StrictRedisCluster( startup_nodes=redis_params.get('startup_nodes', ''), password=redis_params.get('password', '')) self.bloom_filter = BloomFilter( self.redis_cli, blockNum=5, key='bloomfilter_weibo') # url的过滤器,分6个块存,内存空间默认512M def redis_del(self, key=None): """ 删除redis对应的键 目前用在循环抓取时候,清空列表url, 列表url每次循环只抓取一遍,直至下次循环 :return: """ if not key: return res = self.redis_cli.delete(key) return res def redis_rpush(self, name, data): """ 推入数据到redis指定任务列表中 rpush,将新的数据放在最后面 :return: """ try: if isinstance(data, list): for each in data: self.redis_cli.rpush(name, each) else: self.redis_cli.lpush(name, data) except: return def redis_lpush(self, name, data): """ 推入数据到redis指定任务列表中 lpush,将新的数据放在最前面 :return: """ try: if isinstance(data, list): for each in data: self.redis_cli.lpush(name, each) else: self.redis_cli.lpush(name, data) except: return def redis_rpop(self, name): """ 从指定任务列表中获取数据 rpop,从最后取 :return: """ try: res = self.redis_cli.rpop(name) return res except: return def redis_lpop(self, name): """ 从指定任务列表中获取数据 lpop,从头部取 :return: """ try: res = self.redis_cli.lpop(name) return res except: return def redis_brpop(self, name, timeout=1): """ 从指定任务列表中获取数据 brpop,阻塞,从最后取 :return: """ try: unuse, res = self.redis_cli.brpop(name, timeout=timeout) return res except Exception as e: print(e) return def redis_query(self, name): """ 查询指定任务列表中数据 :param name: :return: """ try: res = self.redis_cli.llen(name) return res except: return def redis_sadd(self, name, data): """ 集合中插入数据 :return: """ try: if isinstance(data, list) or isinstance(data, set): for each in data: self.redis_cli.sadd(name, each) else: self.redis_cli.sadd(name, data) except: return def redis_sismember(self, name, data): """ 校验元素是否存在于集合中 :return: """ return self.redis_cli.sismember(name, data) def redis_scard(self, name): """ 返回集合成员个数 :return: """ return int(self.redis_cli.scard(name)) def redis_spop(self, name): """ 获取集合中的随机一个元素 :param name: :return: """ return self.redis_cli.spop(name) def redis_srem(self, name, data): """ 移除指定成员 :param name: :param data: :return: """ self.redis_cli.srem(name, data)
THREAD_PROXY_MAP = {} # 线程与代理关系 season = time.strftime('%Y-%m') # 用户的玩家列表url中season参数 def get_redis_proxy(): ''' 从redis相应的key中获取代理ip(读取快代理的代理ip) :return: ''' current_time = int(time.strftime('%H%M%S')) if 001000 >= current_time >= 000000: # 退出进程 print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'get_redis_proxy()退出' return False startup_nodes = [{'host': 'redis3', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) pubg_friends_proxy_length = r.llen( 'spider:pubg_friends:proxy:kuai') # pubg_friends print time.strftime( '[%Y-%m-%d %H:%M:%S]' ), 'redis中pubg_friends的代理ip长度:', pubg_friends_proxy_length if pubg_friends_proxy_length >= 50: proxy_length = 50 else: print '当前快代理redis中的代理数量少于50:', r.llen('spider:pubg_friends:proxy:kuai') time.sleep(60) return get_redis_proxy() for i in xrange(proxy_length): ip = r.lpop('spider:pubg_friends:proxy:kuai') if ip: proxies = { 'http': "http://{ip}".format(ip=ip),
PROXY_IP_Q = Queue.Queue() # 代理ip队列 season = time.strftime('%Y-%m') # 用户的玩家列表url中season参数 def get_redis_proxy(): ''' 从redis相应的key中获取代理ip(读取快代理的代理ip) :return: ''' current_time = int(time.strftime('%H%M%S')) if 001000 >= current_time >= 000000: # 退出进程 print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'get_redis_proxy()退出' return False startup_nodes = [{'host': 'redis3', 'port': '6379'}] r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) pubg_death_proxy_length = r.llen( 'spider:pubg_death:proxy:kuai') # pubg_death print time.strftime('[%Y-%m-%d %H:%M:%S]' ), 'redis中pubg_death的代理ip长度:', pubg_death_proxy_length if pubg_death_proxy_length >= 50: proxy_length = 50 else: print '当前快代理redis中的代理数量少于50:', r.llen('spider:pubg_death:proxy:kuai') time.sleep(60) return get_redis_proxy() for i in xrange(proxy_length): ip = r.lpop('spider:pubg_death:proxy:kuai') if ip: proxies = { 'http': "http://{ip}".format(ip=ip), 'https': "http://{ip}".format(ip=ip)
#!/usr/bin/python3.4 # -*- coding: utf-8 -*- import redis from rediscluster import StrictRedisCluster redis_nodes = [{'host': '192.168.230.218', 'port': 6380}, {'host': '192.168.230.218', 'port': 6381}, {'host': '192.168.230.218', 'port': 6382}, {'host': '192.168.230.223', 'port': 6383}, {'host': '192.168.230.223', 'port': 6384}, {'host': '192.168.230.223', 'port': 6385} ] r = StrictRedisCluster(startup_nodes=redis_nodes) name = "url" length = r.llen(name) print(length) print(r.lrange(name, 0, -1))
class RedisMiddleware(object): """ 任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本 """ def __init__(self, taskname, redis_params): # self._mkdata() self.redis_cli = StrictRedisCluster( startup_nodes=redis_params.get('startup_nodes', ''), password=redis_params.get('password', '')) # 实例化两个bloomfilter self.bloom_urls = BloomFilter( self.redis_cli, blockNum=6, key='bloomfilter_pub') # url的过滤器,分6个块存,内存空间默认512M # list的过滤器,默认1个块存,内存空间给32M self.bloom_list = BloomFilter(self.redis_cli, key='{}:redis_list'.format(taskname), bit_size=1 << 28) # self.redis_cli = redis.Redis(host=redis_host, port=redis_port, db=0, password=redis_psw) def redis_del(self, key=None): """ 删除redis对应的键 目前用在循环抓取时候,清空列表url, 列表url每次循环只抓取一遍,直至下次循环 :return: """ if not key: return res = self.redis_cli.delete(key) return res def redis_push(self, name, data): """ 推入数据到redis指定任务列表中 lpush,将新的数据放在最前面 :return: """ try: if isinstance(data, list): for each in data: self.redis_cli.lpush(name, each) else: self.redis_cli.lpush(name, data) except: return def redis_pop(self, name): """ 从指定任务列表中获取数据 rpop,从最后取 :return: """ try: res = self.redis_cli.rpop(name) return res except: return def redis_brpop(self, name, timeout=1): """ 从指定任务列表中获取数据 brpop,阻塞,从最后取 :return: """ try: unuse, res = self.redis_cli.brpop(name, timeout=timeout) return res except Exception as e: print(e) return def redis_query(self, name): """ 查询指定任务列表中数据 :param name: :return: """ try: res = self.redis_cli.llen(name) return res except: return
class RedisQueue(object): """ A Queue like message built over redis """ Empty = BaseQueue.Empty Full = BaseQueue.Full max_timeout = 0.3 def __init__(self, name, host='localhost', port=6379, db=0, maxsize=0, lazy_limit=True, password=None, cluster_nodes=None): """ Constructor for RedisQueue maxsize: an integer that sets the upperbound limit on the number of items that can be placed in the queue. lazy_limit: redis queue is shared via instance, a lazy size limit is used for better performance. """ self.name = name if(cluster_nodes is not None): from rediscluster import StrictRedisCluster self.redis = StrictRedisCluster(startup_nodes=cluster_nodes) else: self.redis = redis.StrictRedis(host=host, port=port, db=db, password=password) self.maxsize = maxsize self.lazy_limit = lazy_limit self.last_qsize = 0 def qsize(self): self.last_qsize = self.redis.llen(self.name) return self.last_qsize def empty(self): if self.qsize() == 0: return True else: return False def full(self): if self.maxsize and self.qsize() >= self.maxsize: return True else: return False def put_nowait(self, obj): if self.lazy_limit and self.last_qsize < self.maxsize: pass elif self.full(): raise self.Full self.last_qsize = self.redis.rpush(self.name, umsgpack.packb(obj)) return True def put(self, obj, block=True, timeout=None): if not block: return self.put_nowait(obj) start_time = time.time() while True: try: return self.put_nowait(obj) except self.Full: if timeout: lasted = time.time() - start_time if timeout > lasted: time.sleep(min(self.max_timeout, timeout - lasted)) else: raise else: time.sleep(self.max_timeout) def get_nowait(self): ret = self.redis.lpop(self.name) if ret is None: raise self.Empty return umsgpack.unpackb(ret) def get(self, block=True, timeout=None): if not block: return self.get_nowait() start_time = time.time() while True: try: return self.get_nowait() except self.Empty: if timeout: lasted = time.time() - start_time if timeout > lasted: time.sleep(min(self.max_timeout, timeout - lasted)) else: raise else: time.sleep(self.max_timeout)