Esempio n. 1
0
def file_write_redis():
    '''
    将源文件中的内容写入redis中
    :return:
    '''
    gaode_file_path = file_exists()  #
    # fanyule_two_game_file_path = r'/ftp_samba/112/file_4spider/dmn_fanyule2_game/'    #
    startup_nodes = [{'host': 'redis1', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)

    # 高德
    gaode_file = open(gaode_file_path, 'r')
    gaode_keyword_length = r.llen(
        'spider:python:gaode:keyword')  # redis中gaode的数据量
    print 'redis中gaode_keyword列表长度:', gaode_keyword_length
    if gaode_keyword_length != 0:
        r.delete('spider:python:gaode:keyword')
        print 'redis中gaode_keyword列表长度不为0, 删除后的列表长度:', r.llen(
            'spider:python:gaode:keyword')
    for line in gaode_file:
        new_line = line.strip()
        if new_line:
            r.rpush('spider:python:gaode:keyword', new_line)
    gaode_keyword_length = r.llen('spider:python:gaode:keyword')
    print '重新写入后redis中gaode_keyword列表长度:', gaode_keyword_length
Esempio n. 2
0
def main():
    lock = threading.Lock()
    print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'start'

    startup_nodes = [{'host': 'redis3', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    print 'redis中pubg_death昵称列表长度:', r.llen('spider:python:pubg_death:keyword')

    get_redis_proxy()  # 将redis中的代理ip放入到PROXY_IP_Q队列中
    proxy_count = PROXY_IP_Q.qsize()
    print time.strftime('[%Y-%m-%d %H:%M:%S]'), '代理ip队列中的ip数量:', proxy_count
    print time.strftime('[%Y-%m-%d %H:%M:%S]'), '开启的线程数:', proxy_count

    threads = []
    for i in xrange(50):
        t = threading.Thread(target=pubg_userid, args=(lock, r))
        t.start()
        threads.append(t)

    for t in threads:
        t.join()

    print '源数据剩余量:', r.llen('spider:python:pubg_death:keyword')

    print '抓取结束'
def main():
    startup_nodes = [{'host': 'redis3', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    gaode_length = r.llen('spider:python:gaode:keyword:dest')  # redis的数据量
    print time.strftime(
        '[%Y-%m-%d %H:%M:%S]'), 'redis中gaode的数据量:', gaode_length
    lock = threading.Lock()
    first_hour = time.strftime('%H')
    date = time.strftime('%Y%m%d')

    dest_path = '/ftp_samba/112/spider/python/gd_location/'  # 线上数据存储文件
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)
    dest_file_name = os.path.join(dest_path, 'gd_location_' + date)
    fileout = open(dest_file_name, 'a')

    threads = []
    for i in xrange(1):
        t = threading.Thread(target=gaode, args=(lock, r, first_hour, fileout))
        t.start()
        threads.append(t)

    for t in threads:
        t.join()

    try:
        fileout.flush()
        fileout.close()
    except IOError as e:
        time.sleep(2)
        fileout.close()
Esempio n. 4
0
def main():
    startup_nodes = [{'host': 'redis3', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    pubg_friends_length = r.llen(
        'spider:python:pubg_friends:keyword:dest')  # redis的数据量
    print time.strftime(
        '[%Y-%m-%d %H:%M:%S]'), 'redis中pubg_friends的数据量:', pubg_friends_length
    lock = threading.Lock()
    first_hour = time.strftime('%H')  # 小时
    date = time.strftime('%Y%m%d')  # 数据文件日期

    dest_path = '/ftp_samba/112/spider/python/pubg'  # linux上的文件目录
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)
    dest_file_name = os.path.join(dest_path, 'pubg_friends_' + date)
    fileout = open(dest_file_name, 'a')

    threads = []
    for i in xrange(1):
        t = threading.Thread(target=pubg_friends,
                             args=(lock, r, first_hour, fileout))
        t.start()
        threads.append(t)

    for t in threads:
        t.join()

    try:
        fileout.flush()
        fileout.close()
    except IOError as e:
        time.sleep(2)
        fileout.close()

    print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'over'
Esempio n. 5
0
def redis_cluster():
    '''集群操作'''
    redis_nodes = [{
        'host': '10.12.28.222',
        'port': 6380
    }, {
        'host': '10.12.28.222',
        'port': 6381
    }, {
        'host': '10.12.28.224',
        'port': 6380
    }, {
        'host': '10.12.28.224',
        'port': 6381
    }, {
        'host': '10.12.28.227',
        'port': 6380
    }, {
        'host': '10.12.28.227',
        'port': 6381
    }]

    try:
        r = StrictRedisCluster(startup_nodes=redis_nodes)
    except Exception as e:
        print("connect error %s" % e)

    # string 操作
    r.set('thoth:thoth-ai:robot:1', 'kk')
    # r.delete('thoth:thoth-ai:robot:1')
    print("name is", r.get('thoth:thoth-ai:robot:1'))

    # list 操作
    r.lpush('thoth:thoth-ai:robot:2', [[1, 2, 3], [2, 3, 4]])
    print('list len:', r.llen("thoth:thoth-ai:robot:2"))  # list size
    print("list ", r.lindex('thoth:thoth-ai:robot:2', 0))

    # hash 操作
    r.hset('thoth:thoth-ai:robot:3', 'private_vector', [[1, 2, 3], [2, 3, 4]])
    r.hset('thoth:thoth-ai:robot:3', 'public_vector', [['4', 3, 2], [0, 1, 1]])

    pv = r.hget(
        'thoth:thoth-ai:robot:3',
        'public_vector',
    )
    print('hash.robot3.public_vector:', pv)
    aaa = pv.decode('utf-8')
    print(type(aaa), aaa)
    b = eval(aaa)  # eval 函数妙用:将string‘[1,2,3]’--->list [1,2,3]
    print(type(b), b)
Esempio n. 6
0
def exec_redis(redis_nodes,cmd_list):
    '''
        redis_nodes =  [{'host':'10.101.104.132','port':1321},
                        {'host':'10.101.104.132','port':1322},
                        {'host':'10.101.104.132','port':1323}
                        {'host':'10.101.104.132','port':1323,'password':None,'db':0}
                        ]
        cmd_list = [  #暂时只支持read,而且只有这几种
            "get key1",
            "llen lis1",
            "smembers set1",
            "scard set1",
            "hgetall hashtable",
            "lindex list1  2",
            "hget hashtable key1",
            "lrange list1  1 3",
        ]
    '''

    redisconn = StrictRedisCluster(startup_nodes=redis_nodes,decode_responses=True)
    res = {}
    for cmd in cmd_list:
        cmd = cmd.strip()
        arr = re.split(" +",cmd)
        type = arr[0].lower()
        if type ==  'get':
            tmp = redisconn.get(arr[1])
        elif type == 'hgetall':
            tmp = redisconn.hgetall(arr[1])
        elif type == 'llen':
            tmp = redisconn.llen(arr[1])
        elif type == 'smembers':
            tmp = redisconn.smembers(arr[1])
        elif type == 'scard':
            tmp = redisconn.scard(arr[1])
        elif type == 'hget':
            tmp = redisconn.hget(arr[1],arr[2])
        elif type == 'lrange':
            tmp = redisconn.lrange(arr[1],arr[2],arr[3])
        elif type == 'lindex':
            tmp = redisconn.lrange(arr[1],arr[2])
        else:
            tmp = 'this cmd is not support yeild'
        res[cmd] = [json_decode(tmp)]
    return res
Esempio n. 7
0
def get_redis_proxy():
    '''
    从redis相应的key中获取代理ip
    :return:
    '''
    startup_nodes = [{'host': 'redis1', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    weibo_user_proxy_length = r.llen('spider:weibo_user:proxy')  # weibo_user
    print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'redis中weibo_user的代理ip长度:', weibo_user_proxy_length
    if weibo_user_proxy_length == 0:
        print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'redis中的代理ip数量为0,等待60s'
        time.sleep(60)
        return get_redis_proxy()
    for i in xrange(weibo_user_proxy_length):
        ip = r.lpop('spider:weibo_user:proxy')
        proxies = {
            'http': "http://*****:*****@{ip}".format(ip=ip),
            'https': "http://*****:*****@{ip}".format(ip=ip)
        }
        PROXY_IP_Q.put(proxies)
Esempio n. 8
0
def main():
    lock = Lock()
    print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'start'

    startup_nodes = [{'host': 'redis1', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    print 'redis中gaode_keyword来源列表长度:', r.llen('spider:python:gaode:keyword')

    key_test()  # 将有效的key放入KEYS_QUEUE
    key = KEYS_QUEUE.get()
    print '获取的第一个key:', key

    threads = []
    for i in xrange(50):
        t = threading.Thread(target=gaode, args=(lock, key, r))
        t.start()
        threads.append(t)

    for t in threads:
        t.join()

    print time.strftime('[%Y-%m-%d %H:%M:%S]'), '抓取结束'
Esempio n. 9
0
def get_redis_proxy():
    '''
    从redis相应的key中获取代理ip(读取快代理的代理ip)
    :return:
    '''
    startup_nodes = [{'host': 'redis2', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    baidu_zhishu_proxy_length = r.llen(
        'spider:baidu_zhishu:proxy:kuai')  # baidu_zhishu
    print time.strftime(
        '[%Y-%m-%d %H:%M:%S]'
    ), 'redis中baidu_zhishu的代理ip长度:', baidu_zhishu_proxy_length
    if baidu_zhishu_proxy_length == 0:
        print time.strftime('[%Y-%m-%d %H:%M:%S]'), 'redis中的代理ip数量为0,等待60s'
        time.sleep(60)
        return get_redis_proxy()
    for i in xrange(20):
        ip = r.lpop('spider:baidu_zhishu:proxy:kuai')
        proxies = {
            'http': "http://{ip}".format(ip=ip),
            # 'https': "http://{ip}".format(ip=ip)
        }
        PROXY_IP_Q.put(proxies)
Esempio n. 10
0
def file_write_redis():
    '''
    将源文件中的内容写入redis中
    :return:
    '''
    # wrd_keyword_file_path = file_exists()    # 数据源文件路径
    pubg_nickname_file_path = r'/ftp_samba/112/file_4spider/pubg_nickname/pubg_nickname'  # 数据源文件路径
    startup_nodes = [{'host': 'redis2', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)

    pubg_nickname_file = open(pubg_nickname_file_path, 'r')
    pubg_friends_nickname_length = r.llen('spider:python:pubg_friends:keyword')
    print 'redis中pubg_friends_nickname列表长度:', pubg_friends_nickname_length
    if pubg_friends_nickname_length < 200000:
        for line in pubg_nickname_file:
            r.rpush('spider:python:pubg_friends:keyword', line.strip())
        pubg_friends_nickname_length = r.llen(
            'spider:python:pubg_friends:keyword')
        print '重新写入后redis中pubg_friends_nickname列表长度:', pubg_friends_nickname_length

    pubg_nickname_file = open(pubg_nickname_file_path, 'r')
    pubg_match_nickname_length = r.llen('spider:python:pubg_match:keyword')
    print 'redis中pubg_match_nickname列表长度:', pubg_match_nickname_length
    if pubg_match_nickname_length < 200000:
        for line in pubg_nickname_file:
            r.rpush('spider:python:pubg_match:keyword', line.strip())
        pubg_match_nickname_length = r.llen('spider:python:pubg_match:keyword')
        print '重新写入后redis中pubg_match_nickname列表长度:', pubg_match_nickname_length

    pubg_nickname_file = open(pubg_nickname_file_path, 'r')
    pubg_death_nickname_length = r.llen('spider:python:pubg_death:keyword')
    print 'redis中pubg_death_nickname列表长度:', pubg_death_nickname_length
    if pubg_death_nickname_length < 200000:
        for line in pubg_nickname_file:
            r.rpush('spider:python:pubg_death:keyword', line.strip())
        pubg_death_nickname_length = r.llen('spider:python:pubg_death:keyword')
        print '重新写入后redis中pubg_death_nickname列表长度:', pubg_death_nickname_length
Esempio n. 11
0
class RedisDB():
    def __init__(self, ip_ports = IP_PORTS, db = DB, user_pass = USER_PASS):
        # super(RedisDB, self).__init__()

        if not hasattr(self,'_redis'):
            self._is_redis_cluster = False

            try:
                if len(ip_ports) > 1:
                    startup_nodes = []
                    for ip_port in ip_ports:
                        ip, port = ip_port.split(':')
                        startup_nodes.append({"host":ip, "port":port})

                    self._redis = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
                    self._pipe = self._redis.pipeline(transaction=False)

                    self._is_redis_cluster = True

                else:
                    ip, port = ip_ports[0].split(':')
                    self._redis = redis.Redis(host = ip, port = port, db = db, password = user_pass, decode_responses=True) # redis默认端口是6379
                    self._pipe = self._redis.pipeline(transaction=True) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

            except Exception as e:
                raise
            else:
                log.info('连接到redis数据库 %s'%(tools.dumps_json(ip_ports)))

    def sadd(self, table, values):
        '''
        @summary: 使用无序set集合存储数据, 去重
        ---------
        @param table:
        @param values: 值; 支持list 或 单个值
        ---------
        @result: 若库中存在 返回0,否则入库,返回1。 批量添加返回None
        '''

        if isinstance(values, list):
            if not self._is_redis_cluster: self._pipe.multi()
            for value in values:
                self._pipe.sadd(table, value)
            self._pipe.execute()

        else:
            return self._redis.sadd(table, values)

    def sget(self, table, count = 0, is_pop = True):
        datas = []
        if is_pop:
            count = count if count <= self.sget_count(table) else self.sget_count(table)
            if count:
                if count > 1:
                    if not self._is_redis_cluster: self._pipe.multi()
                    while count:
                        self._pipe.spop(table)
                        count -= 1
                    datas = self._pipe.execute()

                else:
                    datas.append(self._redis.spop(table))

        else:
            datas =  self._redis.srandmember(table, count)

        return datas

    def sget_count(self, table):
        return self._redis.scard(table)

    def sdelete(self, table):
        '''
        @summary: 删除set集合的大键(数据量大的表)
        删除大set键,使用sscan命令,每次扫描集合中500个元素,再用srem命令每次删除一个键
        若直接用delete命令,会导致Redis阻塞,出现故障切换和应用程序崩溃的故障。
        ---------
        @param table:
        ---------
        @result:
        '''
        # 当 SCAN 命令的游标参数被设置为 0 时, 服务器将开始一次新的迭代, 而当服务器向用户返回值为 0 的游标时, 表示迭代已结束
        cursor = '0'
        while cursor != 0:
            cursor, data = self._redis.sscan(table, cursor = cursor, count = 10000)
            for item in data:
                # self._redis.srem(table,item)
                self._pipe.srem(table, item)

            # print('sdelete %s data size %s'%(table, len(data)))
            self._pipe.execute()



    def zadd(self, table, values,  prioritys = 0):
        '''
        @summary: 使用有序set集合存储数据, 去重(值存在更新)
        ---------
        @param table:
        @param values: 值; 支持list 或 单个值
        @param prioritys: 优先级; double类型,支持list 或 单个值。 根据此字段的值来排序, 值越小越优先。 可不传值,默认value的优先级为0
        ---------
        @result:若库中存在 返回0,否则入库,返回1。 批量添加返回None
        '''

        if isinstance(values, list):
            if not isinstance(prioritys, list):
                prioritys = [prioritys] * len(values)
            else:
                assert len(values) == len(prioritys), 'values值要与prioritys值一一对应'

            if not self._is_redis_cluster: self._pipe.multi()
            for value, priority in zip(values, prioritys):
                if self._is_redis_cluster:
                    self._pipe.zadd(table, priority, value)
                else:
                    self._pipe.zadd(table, value, priority)
            self._pipe.execute()

        else:
            if self._is_redis_cluster:
                return self._redis.zadd(table, prioritys, values)
            else:
                return self._redis.zadd(table, values, prioritys)

    def zget(self, table, count = 0, is_pop = True):
        '''
        @summary: 从有序set集合中获取数据
        ---------
        @param table:
        @param count: 数量
        @param is_pop:获取数据后,是否在原set集合中删除,默认是
        ---------
        @result: 列表
        '''
        start_pos = 0 # 包含
        end_pos = 0 if count == 0 else count - 1 # 包含

        if not self._is_redis_cluster: self._pipe.multi() # 标记事务的开始 参考 http://www.runoob.com/redis/redis-transactions.html
        self._pipe.zrange(table, start_pos, end_pos) # 取值
        if is_pop: self._pipe.zremrangebyrank(table, start_pos, end_pos) # 删除
        results, count = self._pipe.execute()
        return results

    def zget_count(self, table, priority_min = None, priority_max = None):
        '''
        @summary: 获取表数据的数量
        ---------
        @param table:
        @param priority_min:优先级范围 最小值(包含)
        @param priority_max:优先级范围 最大值(包含)
        ---------
        @result:
        '''

        if priority_min != None and priority_max != None:
            return self._redis.zcount(table, priority_min, priority_max)
        else:
            return self._redis.zcard(table)

    def lpush(self, table, values):
        if isinstance(values, list):
            if not self._is_redis_cluster: self._pipe.multi()
            for value in values:
                self._pipe.rpush(table, value)
            self._pipe.execute()

        else:
            return self._redis.rpush(table, values)

    def lpop(self, table, count = 1):
        '''
        @summary:
        ---------
        @param table:
        @param count:
        ---------
        @result: 返回列表
        '''
        datas = []

        count = count if count <= self.lget_count(table) else self.lget_count(table)

        if count:
            if count > 1:
                if not self._is_redis_cluster: self._pipe.multi()
                while count:
                    data = self._pipe.lpop(table)
                    count -= 1
                datas = self._pipe.execute()

            else:
                datas.append(self._redis.lpop(table))

        return datas

    def lget_count(self, table):
        return self._redis.llen(table)

    def setbit(self, table, offset, value):
        self._redis.setbit(table, offset, value)

    def getbit(self, table, offset):
        return self._redis.getbit(table, offset)

    def clear(self, table):
        try:
            self._redis.delete(table)
        except Exception as e:
            log.error(e)
Esempio n. 12
0
class SharQ(object):
    """The SharQ object is the core of this queue.
    SharQ does the following.

        1. Accepts a configuration file.
        2. Initializes the queue.
        3. Exposes functions to interact with the queue.
    """
    def __init__(self, config_path):
        """Construct a SharQ object by doing the following.
            1. Read the configuration path.
            2. Load the config.
            3. Initialized SharQ.
        """
        self.config_path = config_path
        self._load_config()
        self._initialize()

    def _initialize(self):
        """Read the SharQ configuration and set appropriate
        variables. Open a redis connection pool and load all
        the Lua scripts.
        """
        self._key_prefix = self._config.get('redis', 'key_prefix')
        self._job_expire_interval = int(
            self._config.get('sharq', 'job_expire_interval'))
        self._default_job_requeue_limit = int(
            self._config.get('sharq', 'default_job_requeue_limit'))

        # initalize redis
        redis_connection_type = self._config.get('redis', 'conn_type')
        db = self._config.get('redis', 'db')
        if redis_connection_type == 'unix_sock':
            self._r = redis.StrictRedis(db=db,
                                        unix_socket_path=self._config.get(
                                            'redis', 'unix_socket_path'))
        elif redis_connection_type == 'tcp_sock':
            if self._config.getboolean('redis', 'clustered', fallback=False):
                startup_nodes = [{
                    "host": self._config.get('redis', 'host'),
                    "port": self._config.get('redis', 'port')
                }]
                self._r = StrictRedisCluster(startup_nodes=startup_nodes,
                                             decode_responses=True,
                                             skip_full_coverage_check=True)
            else:
                self._r = redis.StrictRedis(
                    db=db,
                    host=self._config.get('redis', 'host'),
                    port=self._config.get('redis', 'port'))
        self._load_lua_scripts()

    def _load_config(self):
        """Read the configuration file and load it into memory."""
        self._config = ConfigParser.SafeConfigParser()
        self._config.read(self.config_path)

    def reload_config(self, config_path=None):
        """Reload the configuration from the new config file if provided
        else reload the current config file.
        """
        if config_path:
            self.config_path = config_path
        self._load_config()

    def _load_lua_scripts(self):
        """Loads all lua scripts required by SharQ."""
        # load lua scripts
        lua_script_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'scripts/lua')
        with open(os.path.join(lua_script_path, 'enqueue.lua'),
                  'r') as enqueue_file:
            self._lua_enqueue_script = enqueue_file.read()
            self._lua_enqueue = self._r.register_script(
                self._lua_enqueue_script)

        with open(os.path.join(lua_script_path, 'dequeue.lua'),
                  'r') as dequeue_file:
            self._lua_dequeue_script = dequeue_file.read()
            self._lua_dequeue = self._r.register_script(
                self._lua_dequeue_script)

        with open(os.path.join(lua_script_path, 'finish.lua'),
                  'r') as finish_file:
            self._lua_finish_script = finish_file.read()
            self._lua_finish = self._r.register_script(self._lua_finish_script)

        with open(os.path.join(lua_script_path, 'interval.lua'),
                  'r') as interval_file:
            self._lua_interval_script = interval_file.read()
            self._lua_interval = self._r.register_script(
                self._lua_interval_script)

        with open(os.path.join(lua_script_path, 'requeue.lua'),
                  'r') as requeue_file:
            self._lua_requeue_script = requeue_file.read()
            self._lua_requeue = self._r.register_script(
                self._lua_requeue_script)

        with open(os.path.join(lua_script_path, 'metrics.lua'),
                  'r') as metrics_file:
            self._lua_metrics_script = metrics_file.read()
            self._lua_metrics = self._r.register_script(
                self._lua_metrics_script)

    def reload_lua_scripts(self):
        """Lets user reload the lua scripts in run time."""
        self._load_lua_scripts()

    def enqueue(self,
                payload,
                interval,
                job_id,
                queue_id,
                queue_type='default',
                requeue_limit=None):
        """Enqueues the job into the specified queue_id
        of a particular queue_type
        """
        # validate all the input
        if not is_valid_interval(interval):
            raise BadArgumentException('`interval` has an invalid value.')

        if not is_valid_identifier(job_id):
            raise BadArgumentException('`job_id` has an invalid value.')

        if not is_valid_identifier(queue_id):
            raise BadArgumentException('`queue_id` has an invalid value.')

        if not is_valid_identifier(queue_type):
            raise BadArgumentException('`queue_type` has an invalid value.')

        if requeue_limit is None:
            requeue_limit = self._default_job_requeue_limit

        if not is_valid_requeue_limit(requeue_limit):
            raise BadArgumentException('`requeue_limit` has an invalid value.')

        try:
            serialized_payload = serialize_payload(payload)
        except TypeError as e:
            raise BadArgumentException(e.message)

        timestamp = str(generate_epoch())

        keys = [self._key_prefix, queue_type]

        args = [
            timestamp, queue_id, job_id,
            '"%s"' % serialized_payload, interval, requeue_limit
        ]

        self._lua_enqueue(keys=keys, args=args)

        response = {'status': 'queued'}
        return response

    def dequeue(self, queue_type='default'):
        """Dequeues a job from any of the ready queues
        based on the queue_type. If no job is ready,
        returns a failure status.
        """
        if not is_valid_identifier(queue_type):
            raise BadArgumentException('`queue_type` has an invalid value.')

        timestamp = str(generate_epoch())

        keys = [self._key_prefix, queue_type]
        args = [timestamp, self._job_expire_interval]

        dequeue_response = self._lua_dequeue(keys=keys, args=args)

        if len(dequeue_response) < 4:
            response = {'status': 'failure'}
            return response

        queue_id, job_id, payload, requeues_remaining = dequeue_response
        payload = deserialize_payload(payload[1:-1])

        response = {
            'status': 'success',
            'queue_id': queue_id,
            'job_id': job_id,
            'payload': payload,
            'requeues_remaining': int(requeues_remaining)
        }

        return response

    def finish(self, job_id, queue_id, queue_type='default'):
        """Marks any dequeued job as *completed successfully*.
        Any job which gets a finish will be treated as complete
        and will be removed from the SharQ.
        """
        if not is_valid_identifier(job_id):
            raise BadArgumentException('`job_id` has an invalid value.')

        if not is_valid_identifier(queue_id):
            raise BadArgumentException('`queue_id` has an invalid value.')

        if not is_valid_identifier(queue_type):
            raise BadArgumentException('`queue_type` has an invalid value.')

        keys = [self._key_prefix, queue_type]

        args = [queue_id, job_id]

        response = {'status': 'success'}

        finish_response = self._lua_finish(keys=keys, args=args)
        if finish_response == 0:
            # the finish failed.
            response.update({'status': 'failure'})

        return response

    def interval(self, interval, queue_id, queue_type='default'):
        """Updates the interval for a specific queue_id
        of a particular queue type.
        """
        # validate all the input
        if not is_valid_interval(interval):
            raise BadArgumentException('`interval` has an invalid value.')

        if not is_valid_identifier(queue_id):
            raise BadArgumentException('`queue_id` has an invalid value.')

        if not is_valid_identifier(queue_type):
            raise BadArgumentException('`queue_type` has an invalid value.')

        # generate the interval key
        interval_hmap_key = '%s:interval' % self._key_prefix
        interval_queue_key = '%s:%s' % (queue_type, queue_id)
        keys = [interval_hmap_key, interval_queue_key]

        args = [interval]
        interval_response = self._lua_interval(keys=keys, args=args)
        if interval_response == 0:
            # the queue with the id and type does not exist.
            response = {'status': 'failure'}
        else:
            response = {'status': 'success'}

        return response

    def requeue(self):
        """Re-queues any expired job (one which does not get an expire
        before the job_expiry_interval) back into their respective queue.
        This function has to be run at specified intervals to ensure the
        expired jobs are re-queued back.
        """
        timestamp = str(generate_epoch())
        # get all queue_types and requeue one by one.
        # not recommended to do this entire process
        # in lua as it might take long and block other
        # enqueues and dequeues.
        active_queue_type_list = self._r.smembers('%s:active:queue_type' %
                                                  self._key_prefix)
        for queue_type in active_queue_type_list:
            # requeue all expired jobs in all queue types.
            keys = [self._key_prefix, queue_type]

            args = [timestamp]
            job_discard_list = self._lua_requeue(keys=keys, args=args)
            # discard the jobs if any
            for job in job_discard_list:
                queue_id, job_id = job.split(':')
                # explicitly finishing a job
                # is nothing but discard.
                self.finish(job_id=job_id,
                            queue_id=queue_id,
                            queue_type=queue_type)

    def metrics(self, queue_type=None, queue_id=None):
        """Provides a way to get statistics about various parameters like,
        * global enqueue / dequeue rates per min.
        * per queue enqueue / dequeue rates per min.
        * queue length of each queue.
        * list of queue ids for each queue type.
        """
        if queue_id is not None and not is_valid_identifier(queue_id):
            raise BadArgumentException('`queue_id` has an invalid value.')

        if queue_type is not None and not is_valid_identifier(queue_type):
            raise BadArgumentException('`queue_type` has an invalid value.')

        response = {'status': 'failure'}
        if not queue_type and not queue_id:
            # return global stats.
            # list of active queue types (ready + active)
            active_queue_types = self._r.smembers('%s:active:queue_type' %
                                                  self._key_prefix)
            ready_queue_types = self._r.smembers('%s:ready:queue_type' %
                                                 self._key_prefix)
            all_queue_types = active_queue_types | ready_queue_types
            # global rates for past 10 minutes
            timestamp = str(generate_epoch())
            keys = [self._key_prefix]
            args = [timestamp]

            enqueue_details, dequeue_details = self._lua_metrics(keys=keys,
                                                                 args=args)

            enqueue_counts = {}
            dequeue_counts = {}
            # the length of enqueue & dequeue details are always same.
            for i in xrange(0, len(enqueue_details), 2):
                enqueue_counts[str(enqueue_details[i])] = int(
                    enqueue_details[i + 1] or 0)
                dequeue_counts[str(dequeue_details[i])] = int(
                    dequeue_details[i + 1] or 0)

            response.update({
                'status': 'success',
                'queue_types': list(all_queue_types),
                'enqueue_counts': enqueue_counts,
                'dequeue_counts': dequeue_counts
            })
            return response
        elif queue_type and not queue_id:
            # return list of queue_ids.
            # get data from two sorted sets in a transaction
            pipe = self._r.pipeline()
            pipe.zrange('%s:%s' % (self._key_prefix, queue_type), 0, -1)
            pipe.zrange('%s:%s:active' % (self._key_prefix, queue_type), 0, -1)
            ready_queues, active_queues = pipe.execute()
            # extract the queue_ids from the queue_id:job_id string
            active_queues = [i.split(':')[0] for i in active_queues]
            all_queue_set = set(ready_queues) | set(active_queues)
            response.update({
                'status': 'success',
                'queue_ids': list(all_queue_set)
            })
            return response
        elif queue_type and queue_id:
            # return specific details.
            active_queue_types = self._r.smembers('%s:active:queue_type' %
                                                  self._key_prefix)
            ready_queue_types = self._r.smembers('%s:ready:queue_type' %
                                                 self._key_prefix)
            all_queue_types = active_queue_types | ready_queue_types
            # queue specific rates for past 10 minutes
            timestamp = str(generate_epoch())
            keys = ['%s:%s:%s' % (self._key_prefix, queue_type, queue_id)]
            args = [timestamp]

            enqueue_details, dequeue_details = self._lua_metrics(keys=keys,
                                                                 args=args)

            enqueue_counts = {}
            dequeue_counts = {}
            # the length of enqueue & dequeue details are always same.
            for i in xrange(0, len(enqueue_details), 2):
                enqueue_counts[str(enqueue_details[i])] = int(
                    enqueue_details[i + 1] or 0)
                dequeue_counts[str(dequeue_details[i])] = int(
                    dequeue_details[i + 1] or 0)

            # get the queue length for the job queue
            queue_length = self._r.llen(
                '%s:%s:%s' % (self._key_prefix, queue_type, queue_id))

            response.update({
                'status': 'success',
                'queue_length': int(queue_length),
                'enqueue_counts': enqueue_counts,
                'dequeue_counts': dequeue_counts
            })
            return response
        elif not queue_type and queue_id:
            raise BadArgumentException(
                '`queue_id` should be accompanied by `queue_type`.')

        return response

    def clear_queue(self, queue_type=None, queue_id=None, purge_all=False):
        """clear the all entries in queue with particular queue_id
        and queue_type. It takes an optional argument, 
        purge_all : if True, then it will remove the related resources
        from the redis.
        """
        if queue_id is None or not is_valid_identifier(queue_id):
            raise BadArgumentException('`queue_id` has an invalid value.')

        if queue_type is None or not is_valid_identifier(queue_type):
            raise BadArgumentException('`queue_type` has an invalid value.')

        response = {'status': 'Failure', 'message': 'No queued calls found'}
        # remove from the primary sorted set
        primary_set = '{}:{}'.format(self._key_prefix, queue_type)
        queued_status = self._r.zrem(primary_set, queue_id)
        if queued_status:
            response.update({
                'status': 'Success',
                'message': 'Successfully removed all queued calls'
            })
        # do a full cleanup of reources
        # although this is not necessary as we don't remove resources
        # while dequeue operation
        job_queue_list = '{}:{}:{}'.format(self._key_prefix, queue_type,
                                           queue_id)
        if queued_status and purge_all:
            job_list = self._r.lrange(job_queue_list, 0, -1)
            pipe = self._r.pipeline()
            # clear the payload data for job_uuid
            for job_uuid in job_list:
                if job_uuid is None:
                    continue
                payload_set = '{}:payload'.format(self._key_prefix)
                job_payload_key = '{}:{}:{}'.format(queue_type, queue_id,
                                                    job_uuid)
                pipe.hdel(payload_set, job_payload_key)
            # clear jobrequest interval
            interval_set = '{}:interval'.format(self._key_prefix)
            job_interval_key = '{}:{}'.format(queue_type, queue_id)
            pipe.hdel(interval_set, job_interval_key)
            # clear job_queue_list
            pipe.delete(job_queue_list)
            pipe.execute()
            response.update({
                'status':
                'Success',
                'message':
                'Successfully removed all queued calls and purged related resources'
            })
        else:
            # always delete the job queue list
            self._r.delete(job_queue_list)
        return response
Esempio n. 13
0
class RedisDB:
    def __init__(self,
                 ip_ports=None,
                 db=None,
                 user_pass=None,
                 url=None,
                 decode_responses=True,
                 service_name=None,
                 max_connections=32,
                 **kwargs):
        """
        redis的封装
        Args:
            ip_ports: ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"]
            db:
            user_pass:
            url:
            decode_responses:
            service_name: 适用于redis哨兵模式
        """

        # 可能会改setting中的值,所以此处不能直接赋值为默认值,需要后加载赋值
        if ip_ports is None:
            ip_ports = setting.REDISDB_IP_PORTS
        if db is None:
            db = setting.REDISDB_DB
        if user_pass is None:
            user_pass = setting.REDISDB_USER_PASS
        if service_name is None:
            service_name = setting.REDISDB_SERVICE_NAME

        self._is_redis_cluster = False

        try:
            if not url:
                ip_ports = (ip_ports if isinstance(ip_ports, list) else
                            ip_ports.split(","))
                if len(ip_ports) > 1:
                    startup_nodes = []
                    for ip_port in ip_ports:
                        ip, port = ip_port.split(":")
                        startup_nodes.append({"host": ip, "port": port})

                    if service_name:
                        log.debug("使用redis哨兵模式")
                        hosts = [(node["host"], node["port"])
                                 for node in startup_nodes]
                        sentinel = Sentinel(hosts, socket_timeout=3, **kwargs)
                        self._redis = sentinel.master_for(
                            service_name,
                            password=user_pass,
                            db=db,
                            redis_class=redis.StrictRedis,
                            decode_responses=decode_responses,
                            max_connections=max_connections,
                            **kwargs)

                    else:
                        log.debug("使用redis集群模式")
                        self._redis = StrictRedisCluster(
                            startup_nodes=startup_nodes,
                            decode_responses=decode_responses,
                            password=user_pass,
                            max_connections=max_connections,
                            **kwargs)

                    self._is_redis_cluster = True
                else:
                    ip, port = ip_ports[0].split(":")
                    self._redis = redis.StrictRedis(
                        host=ip,
                        port=port,
                        db=db,
                        password=user_pass,
                        decode_responses=decode_responses,
                        max_connections=max_connections,
                        **kwargs)
            else:
                self._redis = redis.StrictRedis.from_url(
                    url, decode_responses=decode_responses)

        except Exception as e:
            raise
        else:
            if not url:
                log.debug("连接到redis数据库 %s db%s" % (ip_ports, db))
            else:
                log.debug("连接到redis数据库 %s" % (url))

        self._ip_ports = ip_ports
        self._db = db
        self._user_pass = user_pass
        self._url = url

    def __repr__(self):
        if self._url:
            return "<Redisdb url:{}>".format(self._url)

        return "<Redisdb ip_ports: {} db:{} user_pass:{}>".format(
            self._ip_ports, self._db, self._user_pass)

    @classmethod
    def from_url(cls, url):
        return cls(url=url)

    def sadd(self, table, values):
        """
        @summary: 使用无序set集合存储数据, 去重
        ---------
        @param table:
        @param values: 值; 支持list 或 单个值
        ---------
        @result: 若库中存在 返回0,否则入库,返回1。 批量添加返回None
        """

        if isinstance(values, list):
            pipe = self._redis.pipeline(
                transaction=True
            )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

            if not self._is_redis_cluster:
                pipe.multi()
            for value in values:
                pipe.sadd(table, value)
            pipe.execute()

        else:
            return self._redis.sadd(table, values)

    def sget(self, table, count=1, is_pop=True):
        """
        返回 list 如 ['1'] 或 []
        @param table:
        @param count:
        @param is_pop:
        @return:
        """
        datas = []
        if is_pop:
            count = count if count <= self.sget_count(
                table) else self.sget_count(table)
            if count:
                if count > 1:
                    pipe = self._redis.pipeline(
                        transaction=True
                    )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

                    if not self._is_redis_cluster:
                        pipe.multi()
                    while count:
                        pipe.spop(table)
                        count -= 1
                    datas = pipe.execute()

                else:
                    datas.append(self._redis.spop(table))

        else:
            datas = self._redis.srandmember(table, count)

        return datas

    def srem(self, table, values):
        """
        @summary: 移除集合中的指定元素
        ---------
        @param table:
        @param values: 一个或者列表
        ---------
        @result:
        """
        if isinstance(values, list):
            pipe = self._redis.pipeline(
                transaction=True
            )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

            if not self._is_redis_cluster:
                pipe.multi()
            for value in values:
                pipe.srem(table, value)
            pipe.execute()
        else:
            self._redis.srem(table, values)

    def sget_count(self, table):
        return self._redis.scard(table)

    def sdelete(self, table):
        """
        @summary: 删除set集合的大键(数据量大的表)
        删除大set键,使用sscan命令,每次扫描集合中500个元素,再用srem命令每次删除一个键
        若直接用delete命令,会导致Redis阻塞,出现故障切换和应用程序崩溃的故障。
        ---------
        @param table:
        ---------
        @result:
        """
        # 当 SCAN 命令的游标参数被设置为 0 时, 服务器将开始一次新的迭代, 而当服务器向用户返回值为 0 的游标时, 表示迭代已结束
        cursor = "0"
        while cursor != 0:
            cursor, data = self._redis.sscan(table, cursor=cursor, count=500)
            for item in data:
                # pipe.srem(table, item)
                self._redis.srem(table, item)

            # pipe.execute()

    def sismember(self, table, key):
        "Return a boolean indicating if ``value`` is a member of set ``name``"
        return self._redis.sismember(table, key)

    def zadd(self, table, values, prioritys=0):
        """
        @summary: 使用有序set集合存储数据, 去重(值存在更新)
        ---------
        @param table:
        @param values: 值; 支持list 或 单个值
        @param prioritys: 优先级; double类型,支持list 或 单个值。 根据此字段的值来排序, 值越小越优先。 可不传值,默认value的优先级为0
        ---------
        @result:若库中存在 返回0,否则入库,返回1。 批量添加返回 [0, 1 ...]
        """
        if isinstance(values, list):
            if not isinstance(prioritys, list):
                prioritys = [prioritys] * len(values)
            else:
                assert len(values) == len(prioritys), "values值要与prioritys值一一对应"

            pipe = self._redis.pipeline(transaction=True)

            if not self._is_redis_cluster:
                pipe.multi()
            for value, priority in zip(values, prioritys):
                pipe.zadd(table, priority, value)
            return pipe.execute()

        else:
            return self._redis.zadd(table, prioritys, values)

    def zget(self, table, count=1, is_pop=True):
        """
        @summary: 从有序set集合中获取数据 优先返回分数小的(优先级高的)
        ---------
        @param table:
        @param count: 数量 -1 返回全部数据
        @param is_pop:获取数据后,是否在原set集合中删除,默认是
        ---------
        @result: 列表
        """
        start_pos = 0  # 包含
        end_pos = count - 1 if count > 0 else count

        pipe = self._redis.pipeline(
            transaction=True
        )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

        if not self._is_redis_cluster:
            pipe.multi(
            )  # 标记事务的开始 参考 http://www.runoob.com/redis/redis-transactions.html
        pipe.zrange(table, start_pos, end_pos)  # 取值
        if is_pop:
            pipe.zremrangebyrank(table, start_pos, end_pos)  # 删除
        results, *count = pipe.execute()
        return results

    def zremrangebyscore(self, table, priority_min, priority_max):
        """
        根据分数移除成员 闭区间
        @param table:
        @param priority_min:
        @param priority_max:
        @return: 被移除的成员个数
        """
        return self._redis.zremrangebyscore(table, priority_min, priority_max)

    def zrangebyscore(self,
                      table,
                      priority_min,
                      priority_max,
                      count=None,
                      is_pop=True):
        """
        @summary: 返回指定分数区间的数据 闭区间
        ---------
        @param table:
        @param priority_min: 优先级越小越优先
        @param priority_max:
        @param count: 获取的数量,为空则表示分数区间内的全部数据
        @param is_pop: 是否删除
        ---------
        @result:
        """

        # 使用lua脚本, 保证操作的原子性
        lua = """
            local key = KEYS[1]
            local min_score = ARGV[2]
            local max_score = ARGV[3]
            local is_pop = ARGV[4]
            local count = ARGV[5]

            -- 取值
            local datas = nil
            if count then
                datas = redis.call('zrangebyscore', key, min_score, max_score, 'limit', 0, count)
            else
                datas = redis.call('zrangebyscore', key, min_score, max_score)
            end

            -- 删除redis中刚取到的值
            if (is_pop) then
                for i=1, #datas do
                    redis.call('zrem', key, datas[i])
                end
            end


            return datas

        """
        cmd = self._redis.register_script(lua)
        if count:
            res = cmd(keys=[table],
                      args=[table, priority_min, priority_max, is_pop, count])
        else:
            res = cmd(keys=[table],
                      args=[table, priority_min, priority_max, is_pop])

        return res

    def zrangebyscore_increase_score(self,
                                     table,
                                     priority_min,
                                     priority_max,
                                     increase_score,
                                     count=None):
        """
        @summary: 返回指定分数区间的数据 闭区间, 同时修改分数
        ---------
        @param table:
        @param priority_min: 最小分数
        @param priority_max: 最大分数
        @param increase_score: 分数值增量 正数则在原有的分数上叠加,负数则相减
        @param count: 获取的数量,为空则表示分数区间内的全部数据
        ---------
        @result:
        """

        # 使用lua脚本, 保证操作的原子性
        lua = """
            local key = KEYS[1]
            local min_score = ARGV[1]
            local max_score = ARGV[2]
            local increase_score = ARGV[3]
            local count = ARGV[4]

            -- 取值
            local datas = nil
            if count then
                datas = redis.call('zrangebyscore', key, min_score, max_score, 'limit', 0, count)
            else
                datas = redis.call('zrangebyscore', key, min_score, max_score)
            end

            --修改优先级
            for i=1, #datas do
                redis.call('zincrby', key, increase_score, datas[i])
            end

            return datas

        """
        cmd = self._redis.register_script(lua)
        if count:
            res = cmd(keys=[table],
                      args=[priority_min, priority_max, increase_score, count])
        else:
            res = cmd(keys=[table],
                      args=[priority_min, priority_max, increase_score])

        return res

    def zrangebyscore_set_score(self,
                                table,
                                priority_min,
                                priority_max,
                                score,
                                count=None):
        """
        @summary: 返回指定分数区间的数据 闭区间, 同时修改分数
        ---------
        @param table:
        @param priority_min: 最小分数
        @param priority_max: 最大分数
        @param score: 分数值
        @param count: 获取的数量,为空则表示分数区间内的全部数据
        ---------
        @result:
        """

        # 使用lua脚本, 保证操作的原子性
        lua = """
            local key = KEYS[1]
            local min_score = ARGV[1]
            local max_score = ARGV[2]
            local set_score = ARGV[3]
            local count = ARGV[4]

            -- 取值
            local datas = nil
            if count then
                datas = redis.call('zrangebyscore', key, min_score, max_score, 'withscores','limit', 0, count)
            else
                datas = redis.call('zrangebyscore', key, min_score, max_score, 'withscores')
            end

            local real_datas = {} -- 数据
            --修改优先级
            for i=1, #datas, 2 do
               local data = datas[i]
               local score = datas[i+1]

               table.insert(real_datas, data) -- 添加数据

               redis.call('zincrby', key, set_score - score, datas[i])
            end

            return real_datas

        """
        cmd = self._redis.register_script(lua)
        if count:
            res = cmd(keys=[table],
                      args=[priority_min, priority_max, score, count])
        else:
            res = cmd(keys=[table], args=[priority_min, priority_max, score])

        return res

    def zget_count(self, table, priority_min=None, priority_max=None):
        """
        @summary: 获取表数据的数量
        ---------
        @param table:
        @param priority_min:优先级范围 最小值(包含)
        @param priority_max:优先级范围 最大值(包含)
        ---------
        @result:
        """

        if priority_min != None and priority_max != None:
            return self._redis.zcount(table, priority_min, priority_max)
        else:
            return self._redis.zcard(table)

    def zrem(self, table, values):
        """
        @summary: 移除集合中的指定元素
        ---------
        @param table:
        @param values: 一个或者列表
        ---------
        @result:
        """
        if isinstance(values, list):
            pipe = self._redis.pipeline(
                transaction=True
            )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

            if not self._is_redis_cluster:
                pipe.multi()
            for value in values:
                pipe.zrem(table, value)
            pipe.execute()
        else:
            self._redis.zrem(table, values)

    def zexists(self, table, values):
        """
        利用zscore判断某元素是否存在
        @param values:
        @return:
        """
        is_exists = []

        if isinstance(values, list):
            pipe = self._redis.pipeline(
                transaction=True
            )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。
            pipe.multi()
            for value in values:
                pipe.zscore(table, value)
            is_exists_temp = pipe.execute()
            for is_exist in is_exists_temp:
                if is_exist != None:
                    is_exists.append(1)
                else:
                    is_exists.append(0)

        else:
            is_exists = self._redis.zscore(table, values)
            is_exists = 1 if is_exists != None else 0

        return is_exists

    def lpush(self, table, values):
        if isinstance(values, list):
            pipe = self._redis.pipeline(
                transaction=True
            )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

            if not self._is_redis_cluster:
                pipe.multi()
            for value in values:
                pipe.rpush(table, value)
            pipe.execute()

        else:
            return self._redis.rpush(table, values)

    def lpop(self, table, count=1):
        """
        @summary:
        ---------
        @param table:
        @param count:
        ---------
        @result: count>1时返回列表
        """
        datas = None

        count = count if count <= self.lget_count(table) else self.lget_count(
            table)

        if count:
            if count > 1:
                pipe = self._redis.pipeline(
                    transaction=True
                )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。

                if not self._is_redis_cluster:
                    pipe.multi()
                while count:
                    pipe.lpop(table)
                    count -= 1
                datas = pipe.execute()

            else:
                datas = self._redis.lpop(table)

        return datas

    def rpoplpush(self, from_table, to_table=None):
        """
        将列表 from_table 中的最后一个元素(尾元素)弹出,并返回给客户端。
        将 from_table 弹出的元素插入到列表 to_table ,作为 to_table 列表的的头元素。
        如果 from_table 和 to_table 相同,则列表中的表尾元素被移动到表头,并返回该元素,可以把这种特殊情况视作列表的旋转(rotation)操作
        @param from_table:
        @param to_table:
        @return:
        """

        if not to_table:
            to_table = from_table

        return self._redis.rpoplpush(from_table, to_table)

    def lget_count(self, table):
        return self._redis.llen(table)

    def lrem(self, table, value, num=0):
        return self._redis.lrem(table, value, num)

    def hset(self, table, key, value):
        """
        @summary:
        如果 key 不存在,一个新的哈希表被创建并进行 HSET 操作。
        如果域 field 已经存在于哈希表中,旧值将被覆盖
        ---------
        @param table:
        @param key:
        @param value:
        ---------
        @result: 1 新插入; 0 覆盖
        """

        return self._redis.hset(table, key, value)

    def hset_batch(self, table, datas):
        """
        批量插入
        Args:
            datas:
                [[key, value]]
        Returns:

        """
        pipe = self._redis.pipeline(transaction=True)

        if not self._is_redis_cluster:
            pipe.multi()
        for key, value in datas:
            pipe.hset(table, key, value)
        return pipe.execute()

    def hincrby(self, table, key, increment):
        return self._redis.hincrby(table, key, increment)

    def hget(self, table, key, is_pop=False):
        if not is_pop:
            return self._redis.hget(table, key)
        else:
            lua = """
                local key = KEYS[1]
                local field = ARGV[1]

                -- 取值
                local datas = redis.call('hget', key, field)
                -- 删除值
                redis.call('hdel', key, field)

                return datas

                    """
            cmd = self._redis.register_script(lua)
            res = cmd(keys=[table], args=[key])

            return res

    def hgetall(self, table):
        return self._redis.hgetall(table)

    def hexists(self, table, key):
        return self._redis.hexists(table, key)

    def hdel(self, table, *keys):
        """
        @summary: 删除对应的key 可传多个
        ---------
        @param table:
        @param *keys:
        ---------
        @result:
        """

        self._redis.hdel(table, *keys)

    def hget_count(self, table):
        return self._redis.hlen(table)

    def setbit(self, table, offsets, values):
        """
        设置字符串数组某一位的值, 返回之前的值
        @param table:
        @param offsets: 支持列表或单个值
        @param values: 支持列表或单个值
        @return: list / 单个值
        """
        if isinstance(offsets, list):
            if not isinstance(values, list):
                values = [values] * len(offsets)
            else:
                assert len(offsets) == len(values), "offsets值要与values值一一对应"

            pipe = self._redis.pipeline(
                transaction=True
            )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。
            pipe.multi()

            for offset, value in zip(offsets, values):
                pipe.setbit(table, offset, value)

            return pipe.execute()

        else:
            return self._redis.setbit(table, offsets, values)

    def getbit(self, table, offsets):
        """
        取字符串数组某一位的值
        @param table:
        @param offsets: 支持列表
        @return: list / 单个值
        """
        if isinstance(offsets, list):
            pipe = self._redis.pipeline(
                transaction=True
            )  # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。
            pipe.multi()
            for offset in offsets:
                pipe.getbit(table, offset)

            return pipe.execute()

        else:
            return self._redis.getbit(table, offsets)

    def bitcount(self, table):
        return self._redis.bitcount(table)

    def strset(self, table, value, **kwargs):
        return self._redis.set(table, value, **kwargs)

    def str_incrby(self, table, value):
        return self._redis.incrby(table, value)

    def strget(self, table):
        return self._redis.get(table)

    def strlen(self, table):
        return self._redis.strlen(table)

    def getkeys(self, regex):
        return self._redis.keys(regex)

    def exists_key(self, key):
        return self._redis.exists(key)

    def set_expire(self, key, seconds):
        """
        @summary: 设置过期时间
        ---------
        @param key:
        @param seconds: 秒
        ---------
        @result:
        """

        self._redis.expire(key, seconds)

    def clear(self, table):
        try:
            self._redis.delete(table)
        except Exception as e:
            log.error(e)

    def get_redis_obj(self):
        return self._redis
Esempio n. 14
0
class RedisClient(object):
    def __init__(self, key, startup_nodes):
        """
		init cluster
		"""
        self.key = key
        self.conn = StrictRedisCluster(startup_nodes=startup_nodes,
                                       decode_responses=True)

    def hdel(self, field):
        """
		delete an item
		:param field:
		:return:
		"""
        self.conn.hdel(self.key, field)

    def hexists(self, field):
        """
		判断 key 中是否含有 field
		:param field:
		:return:
		"""
        return self.conn.hexists(self.key, field)

    def hget(self, field):
        """
		返回key中指定 field 中的 value
		:param field:
		:return:
		"""
        value = self.conn.hget(self.key, field)
        if isinstance(value, bytes):
            return value.decode('utf-8')
        else:
            return value if value else None

    def hgetall(self):
        """
		获取 {filed: value, field1: value1....}
		:return:
		"""
        all_dict = self.conn.hgetall(self.key)
        if not all_dict:
            return
        elif sys.version_info.major == 3:
            return {
                field.decode('utf-8'): value.decode('utf-8')
                for field, value in all_dict.items()
            }
        else:
            return all_dict

    def hkeys(self):
        """
		获取key中所有field
		:return:
		"""
        field = self.conn.hkeys(self.key)
        if isinstance(field, bytes):
            return field.decode('utf-8')
        else:
            return field if field else None

    def hlen(self):
        """
		获取所有 filed 数量
		:return:
		"""
        return self.conn.hlen(self.key)

    def hset(self, field, value):
        """
		设置 field: value
		:param field:
		:param value:
		:return:
		"""
        self.conn.hset(self.key, field, value)

    def hvals(self):
        """
		获取所有values
		:return:
		"""
        values = self.conn.hvals(self.key)
        if not values:
            return
        elif sys.version_info.major == 3:
            return [value.decode('utf-8') for value in values]
        else:
            return values

    def change_key(self, key):
        """
		替换 key
		:param key:
		:return:
		"""
        self.key = key

    # ===============================================
    def blpop(self, timeout):
        self.conn.blpop(self.key, timeout=timeout)

    def brpop(self, timeout):
        self.conn.brpop(self.key, timeout=timeout)

    def brpoplpush(self, dst, timeout):
        self.conn.brpoplpush(self.key, dst=dst, timeout=timeout)

    def lindex(self, i):
        self.conn.lindex(self.key, index=i)

    def llen(self):
        self.conn.llen(self.key)

    def lpop(self):
        self.conn.lpop(self.key)

    def lpush(self):
        self.conn.lpush(self.key)

    def lrange(self, start, stop):
        self.conn.lrange(self.key, start, stop)

    def lset(self, i, value):
        self.conn.lset(self.key, index=i, value=value)

    def rpop(self):
        self.conn.rpop(self.key)

    def rpoplpush(self, dst):
        self.conn.rpoplpush(self.key, dst=dst)

    def rpush(self, value):
        self.conn.rpush(self.key, value)
Esempio n. 15
0
class RedisMiddleware(object):
    """
    任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本
    """
    def __init__(self, redis_params):
        self.redis_cli = StrictRedisCluster(
            startup_nodes=redis_params.get('startup_nodes', ''),
            password=redis_params.get('password', ''))
        self.bloom_filter = BloomFilter(
            self.redis_cli, blockNum=5,
            key='bloomfilter_weibo')  # url的过滤器,分6个块存,内存空间默认512M

    def redis_del(self, key=None):
        """
        删除redis对应的键
        目前用在循环抓取时候,清空列表url,
        列表url每次循环只抓取一遍,直至下次循环
        :return:
        """
        if not key:
            return
        res = self.redis_cli.delete(key)
        return res

    def redis_rpush(self, name, data):
        """
        推入数据到redis指定任务列表中
        rpush,将新的数据放在最后面
        :return:
        """

        try:
            if isinstance(data, list):
                for each in data:
                    self.redis_cli.rpush(name, each)
            else:
                self.redis_cli.lpush(name, data)
        except:
            return

    def redis_lpush(self, name, data):
        """
        推入数据到redis指定任务列表中
        lpush,将新的数据放在最前面
        :return:
        """

        try:
            if isinstance(data, list):
                for each in data:
                    self.redis_cli.lpush(name, each)
            else:
                self.redis_cli.lpush(name, data)
        except:
            return

    def redis_rpop(self, name):
        """
        从指定任务列表中获取数据
        rpop,从最后取
        :return:
        """
        try:
            res = self.redis_cli.rpop(name)
            return res
        except:
            return

    def redis_lpop(self, name):
        """
        从指定任务列表中获取数据
        lpop,从头部取
        :return:
        """
        try:
            res = self.redis_cli.lpop(name)
            return res
        except:
            return

    def redis_brpop(self, name, timeout=1):
        """
        从指定任务列表中获取数据
        brpop,阻塞,从最后取
        :return:
        """
        try:
            unuse, res = self.redis_cli.brpop(name, timeout=timeout)
            return res
        except Exception as e:
            print(e)
            return

    def redis_query(self, name):
        """
        查询指定任务列表中数据
        :param name:
        :return:
        """
        try:
            res = self.redis_cli.llen(name)
            return res
        except:
            return

    def redis_sadd(self, name, data):
        """
        集合中插入数据
        :return:
        """
        try:
            if isinstance(data, list) or isinstance(data, set):
                for each in data:
                    self.redis_cli.sadd(name, each)
            else:
                self.redis_cli.sadd(name, data)
        except:
            return

    def redis_sismember(self, name, data):
        """
        校验元素是否存在于集合中
        :return:
        """
        return self.redis_cli.sismember(name, data)

    def redis_scard(self, name):
        """
        返回集合成员个数
        :return:
        """
        return int(self.redis_cli.scard(name))

    def redis_spop(self, name):
        """
        获取集合中的随机一个元素
        :param name:
        :return:
        """
        return self.redis_cli.spop(name)

    def redis_srem(self, name, data):
        """
        移除指定成员
        :param name:
        :param data:
        :return:
        """
        self.redis_cli.srem(name, data)
Esempio n. 16
0
THREAD_PROXY_MAP = {}  # 线程与代理关系
season = time.strftime('%Y-%m')  # 用户的玩家列表url中season参数


def get_redis_proxy():
    '''
    从redis相应的key中获取代理ip(读取快代理的代理ip)
    :return:
    '''
    current_time = int(time.strftime('%H%M%S'))
    if 001000 >= current_time >= 000000:  # 退出进程
        print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'get_redis_proxy()退出'
        return False
    startup_nodes = [{'host': 'redis3', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    pubg_friends_proxy_length = r.llen(
        'spider:pubg_friends:proxy:kuai')  # pubg_friends
    print time.strftime(
        '[%Y-%m-%d %H:%M:%S]'
    ), 'redis中pubg_friends的代理ip长度:', pubg_friends_proxy_length
    if pubg_friends_proxy_length >= 50:
        proxy_length = 50
    else:
        print '当前快代理redis中的代理数量少于50:', r.llen('spider:pubg_friends:proxy:kuai')
        time.sleep(60)
        return get_redis_proxy()

    for i in xrange(proxy_length):
        ip = r.lpop('spider:pubg_friends:proxy:kuai')
        if ip:
            proxies = {
                'http': "http://{ip}".format(ip=ip),
Esempio n. 17
0
PROXY_IP_Q = Queue.Queue()  # 代理ip队列
season = time.strftime('%Y-%m')  # 用户的玩家列表url中season参数


def get_redis_proxy():
    '''
    从redis相应的key中获取代理ip(读取快代理的代理ip)
    :return:
    '''
    current_time = int(time.strftime('%H%M%S'))
    if 001000 >= current_time >= 000000:  # 退出进程
        print time.strftime('[%Y-%m-%d %H:%M:%S]:'), 'get_redis_proxy()退出'
        return False
    startup_nodes = [{'host': 'redis3', 'port': '6379'}]
    r = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True)
    pubg_death_proxy_length = r.llen(
        'spider:pubg_death:proxy:kuai')  # pubg_death
    print time.strftime('[%Y-%m-%d %H:%M:%S]'
                        ), 'redis中pubg_death的代理ip长度:', pubg_death_proxy_length
    if pubg_death_proxy_length >= 50:
        proxy_length = 50
    else:
        print '当前快代理redis中的代理数量少于50:', r.llen('spider:pubg_death:proxy:kuai')
        time.sleep(60)
        return get_redis_proxy()

    for i in xrange(proxy_length):
        ip = r.lpop('spider:pubg_death:proxy:kuai')
        if ip:
            proxies = {
                'http': "http://{ip}".format(ip=ip),
                'https': "http://{ip}".format(ip=ip)
Esempio n. 18
0
#!/usr/bin/python3.4
# -*- coding: utf-8 -*-

import redis
from rediscluster import StrictRedisCluster

redis_nodes = [{'host': '192.168.230.218', 'port': 6380},
               {'host': '192.168.230.218', 'port': 6381},
               {'host': '192.168.230.218', 'port': 6382},
               {'host': '192.168.230.223', 'port': 6383},
               {'host': '192.168.230.223', 'port': 6384},
               {'host': '192.168.230.223', 'port': 6385}
               ]

r = StrictRedisCluster(startup_nodes=redis_nodes)

name = "url"

length = r.llen(name)
print(length)
print(r.lrange(name, 0, -1))
Esempio n. 19
0
class RedisMiddleware(object):
    """
    任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本
    """
    def __init__(self, taskname, redis_params):
        # self._mkdata()
        self.redis_cli = StrictRedisCluster(
            startup_nodes=redis_params.get('startup_nodes', ''),
            password=redis_params.get('password', ''))
        # 实例化两个bloomfilter
        self.bloom_urls = BloomFilter(
            self.redis_cli, blockNum=6,
            key='bloomfilter_pub')  # url的过滤器,分6个块存,内存空间默认512M
        # list的过滤器,默认1个块存,内存空间给32M
        self.bloom_list = BloomFilter(self.redis_cli,
                                      key='{}:redis_list'.format(taskname),
                                      bit_size=1 << 28)
        # self.redis_cli = redis.Redis(host=redis_host, port=redis_port, db=0, password=redis_psw)

    def redis_del(self, key=None):
        """
        删除redis对应的键
        目前用在循环抓取时候,清空列表url,
        列表url每次循环只抓取一遍,直至下次循环
        :return:
        """
        if not key:
            return
        res = self.redis_cli.delete(key)
        return res

    def redis_push(self, name, data):
        """
        推入数据到redis指定任务列表中
        lpush,将新的数据放在最前面
        :return:
        """

        try:
            if isinstance(data, list):
                for each in data:
                    self.redis_cli.lpush(name, each)
            else:
                self.redis_cli.lpush(name, data)
        except:
            return

    def redis_pop(self, name):
        """
        从指定任务列表中获取数据
        rpop,从最后取
        :return:
        """
        try:
            res = self.redis_cli.rpop(name)
            return res
        except:
            return

    def redis_brpop(self, name, timeout=1):
        """
        从指定任务列表中获取数据
        brpop,阻塞,从最后取
        :return:
        """
        try:
            unuse, res = self.redis_cli.brpop(name, timeout=timeout)
            return res
        except Exception as e:
            print(e)
            return

    def redis_query(self, name):
        """
        查询指定任务列表中数据
        :param name:
        :return:
        """
        try:
            res = self.redis_cli.llen(name)
            return res
        except:
            return
Esempio n. 20
0
class RedisQueue(object):
    """
    A Queue like message built over redis
    """

    Empty = BaseQueue.Empty
    Full = BaseQueue.Full
    max_timeout = 0.3

    def __init__(self, name, host='localhost', port=6379, db=0,
                 maxsize=0, lazy_limit=True, password=None, cluster_nodes=None):
        """
        Constructor for RedisQueue

        maxsize:    an integer that sets the upperbound limit on the number of
                    items that can be placed in the queue.
        lazy_limit: redis queue is shared via instance, a lazy size limit is used
                    for better performance.
        """
        self.name = name
        if(cluster_nodes is not None):
            from rediscluster import StrictRedisCluster
            self.redis = StrictRedisCluster(startup_nodes=cluster_nodes)
        else:
            self.redis = redis.StrictRedis(host=host, port=port, db=db, password=password)
        self.maxsize = maxsize
        self.lazy_limit = lazy_limit
        self.last_qsize = 0

    def qsize(self):
        self.last_qsize = self.redis.llen(self.name)
        return self.last_qsize

    def empty(self):
        if self.qsize() == 0:
            return True
        else:
            return False

    def full(self):
        if self.maxsize and self.qsize() >= self.maxsize:
            return True
        else:
            return False

    def put_nowait(self, obj):
        if self.lazy_limit and self.last_qsize < self.maxsize:
            pass
        elif self.full():
            raise self.Full
        self.last_qsize = self.redis.rpush(self.name, umsgpack.packb(obj))
        return True

    def put(self, obj, block=True, timeout=None):
        if not block:
            return self.put_nowait(obj)

        start_time = time.time()
        while True:
            try:
                return self.put_nowait(obj)
            except self.Full:
                if timeout:
                    lasted = time.time() - start_time
                    if timeout > lasted:
                        time.sleep(min(self.max_timeout, timeout - lasted))
                    else:
                        raise
                else:
                    time.sleep(self.max_timeout)

    def get_nowait(self):
        ret = self.redis.lpop(self.name)
        if ret is None:
            raise self.Empty
        return umsgpack.unpackb(ret)

    def get(self, block=True, timeout=None):
        if not block:
            return self.get_nowait()

        start_time = time.time()
        while True:
            try:
                return self.get_nowait()
            except self.Empty:
                if timeout:
                    lasted = time.time() - start_time
                    if timeout > lasted:
                        time.sleep(min(self.max_timeout, timeout - lasted))
                    else:
                        raise
                else:
                    time.sleep(self.max_timeout)
Esempio n. 21
0
class RedisQueue(object):
    """
    A Queue like message built over redis
    """

    Empty = BaseQueue.Empty
    Full = BaseQueue.Full
    max_timeout = 0.3

    def __init__(self, name, host='localhost', port=6379, db=0,
                 maxsize=0, lazy_limit=True, password=None, cluster_nodes=None):
        """
        Constructor for RedisQueue

        maxsize:    an integer that sets the upperbound limit on the number of
                    items that can be placed in the queue.
        lazy_limit: redis queue is shared via instance, a lazy size limit is used
                    for better performance.
        """
        self.name = name
        if(cluster_nodes is not None):
            from rediscluster import StrictRedisCluster
            self.redis = StrictRedisCluster(startup_nodes=cluster_nodes)
        else:
            self.redis = redis.StrictRedis(host=host, port=port, db=db, password=password)
        self.maxsize = maxsize
        self.lazy_limit = lazy_limit
        self.last_qsize = 0

    def qsize(self):
        self.last_qsize = self.redis.llen(self.name)
        return self.last_qsize

    def empty(self):
        if self.qsize() == 0:
            return True
        else:
            return False

    def full(self):
        if self.maxsize and self.qsize() >= self.maxsize:
            return True
        else:
            return False

    def put_nowait(self, obj):
        if self.lazy_limit and self.last_qsize < self.maxsize:
            pass
        elif self.full():
            raise self.Full
        self.last_qsize = self.redis.rpush(self.name, umsgpack.packb(obj))
        return True

    def put(self, obj, block=True, timeout=None):
        if not block:
            return self.put_nowait(obj)

        start_time = time.time()
        while True:
            try:
                return self.put_nowait(obj)
            except self.Full:
                if timeout:
                    lasted = time.time() - start_time
                    if timeout > lasted:
                        time.sleep(min(self.max_timeout, timeout - lasted))
                    else:
                        raise
                else:
                    time.sleep(self.max_timeout)

    def get_nowait(self):
        ret = self.redis.lpop(self.name)
        if ret is None:
            raise self.Empty
        return umsgpack.unpackb(ret)

    def get(self, block=True, timeout=None):
        if not block:
            return self.get_nowait()

        start_time = time.time()
        while True:
            try:
                return self.get_nowait()
            except self.Empty:
                if timeout:
                    lasted = time.time() - start_time
                    if timeout > lasted:
                        time.sleep(min(self.max_timeout, timeout - lasted))
                    else:
                        raise
                else:
                    time.sleep(self.max_timeout)