Exemple #1
0
class SSDBHashMap(object, SSDBBase):
    def __init__(self,
                 hashmap_name,
                 host="127.0.0.1",
                 port=8888,
                 max_connections=10,
                 timeout=60):
        self.hashmap_name = hashmap_name
        self.host = host
        self.port = port
        self.max_connections = max_connections
        self.timeout = timeout
        pool = BlockingConnectionPool(connection_class=Connection,
                                      max_connections=max_connections,
                                      timeout=timeout,
                                      host=host,
                                      port=port)
        self.ssdb = SSDB(connection_pool=pool)
        SSDBBase.__init__(self, self.ssdb)

    def set(self, key, value):
        return self.ssdb.hset(self.hashmap_name, key, value)

    def get(self, key):
        return self.ssdb.hget(self.hashmap_name, key)

    def delete(self, key):
        return self.ssdb.hdel(self.hashmap_name, key)

    def keys(self, name_start="", name_end="", limit=10):
        return self.ssdb.hkeys(self.hashmap_name, name_start, name_end, limit)

    def exists(self, key):
        return self.ssdb.hexists(self.hashmap_name, key)

    def size(self):
        return self.ssdb.hsize(self.hashmap_name)

    def list(self, name_start="", name_end="", limit=10):
        #列出名字处于区间 (name_start, name_end] 的 hashmap
        return self.ssdb.hlist(name_start, name_end, limit)

    def scan(self, key_start, key_end="", limit=10):
        return self.ssdb.hscan(self.hashmap_name, key_start, key_end, limit)

    def clear(self):
        return self.ssdb.hclear(self.hashmap_name)
Exemple #2
0
class ssdb_proxy(object):
    def __init__(self, set_name, host, port, max_connections=2, timeout=60):
        """数据库初始化"""
        self.set_name = set_name
        self.host = host
        self.port = port
        self.max_connections = max_connections
        self.timeout = timeout
        pool = BlockingConnectionPool(connection_class=Connection,
                                      max_connections=max_connections,
                                      timeout=timeout,
                                      host=host,
                                      port=port)
        self.ssdb = SSDB(connection_pool=pool)
        pass

    def hset(self, key, value):
        """添加集合"""
        return self.ssdb.hset(self.set_name, key, value)

    def hget(self, key):
        """获取key所对应的值"""
        return self.ssdb.hget(self.set_name, key)

    def hgetall(self):
        """获取所有的数据"""
        return self.ssdb.hgetall(self.set_name)

    def hdel(self, key):
        """删除数据"""
        return self.ssdb.hdel(self.set_name, key)

    def hexists(self, key):
        """判断Key是否存在"""
        return self.ssdb.hexists(self.set_name, key)

    def size(self):
        """获取结合的大小"""
        return self.ssdb.hsize(self.set_name)

    def clean(self):
        """清楚所有数据"""
        return self.ssdb.hclear(self.set_name)
# Get URLS from SSDB and turn into a text file called links
import subprocess
import os

from ssdb import SSDB
ssdb = SSDB(host='104.131.54.255', port=8888)

# get hostid
hostid = subprocess.check_output(['curl','http://ipinfo.io/ip']).strip()

limit = ssdb.hget("crawl","limit")
offset = ssdb.hget("crawl", hostid)

users = ssdb.zrange("instagram-users", int(offset), int(limit))
arr = users
 
f = open("links", "w")
f.write("\n".join(map(lambda x: str(x), arr)))
f.close()
class SsdbClient(object):
    """
    SSDB client

    SSDB中代理存放的容器为hash:
        原始代理存放在name为raw_proxy的hash中,key为代理的ip:port,value为None,以后扩展可能会加入代理属性;
        验证后供flask使用的代理存放在name为useful_proxy的hash中,key为代理的ip:port,value为None,以后扩展可能会加入代理属性;

    """
    def __init__(self, name, host, port):
        """
        init
        :param name: hash name
        :param host: ssdb host
        :param port: ssdb port
        :return:
        """
        self.name = name
        self.__conn = SSDB(
            connection_pool=BlockingConnectionPool(host=host, port=port))

    def get(self):
        """
        get an item

        从useful_proxy_queue随机获取一个可用代理, 使用前需要调用changeTable("useful_proxy_queue")
        :return:
        """
        values = self.__conn.hgetall(name=self.name)
        return random.choice(values.keys()) if values else None

    def put(self, key):
        """
        put an  item

        将代理放入hash, 使用changeTable指定hash name
        :param key:
        :return:
        """
        key = json.dump(key, ensure_ascii=False).encode('utf-8') if isinstance(
            key, (dict, list)) else key
        return self.__conn.hincr(self.name, key, 1)
        # return self.__conn.hset(self.name, value, None)

    def getvalue(self, key):
        value = self.__conn.hget(self.name, key)
        return value if value else None

    def pop(self):
        """
        pop an item

        弹出一个代理, 使用changeTable指定hash name
        :return:
        """
        key = self.get()
        if key:
            self.__conn.hdel(self.name, key)
        return key

    def delete(self, key):
        """
        Remove the ``key`` from hash ``name``
        :param key:
        :return:
        """
        self.__conn.hdel(self.name, key)

    def inckey(self, key, value):
        self.__conn.hincr(self.name, key, value)

    def getAll(self):
        return self.__conn.hgetall(self.name).keys()

    def get_status(self):
        """
        Return the number of elements in hash ``name``
        :return:
        """
        return self.__conn.hsize(self.name)

    def changeTable(self, name):
        self.name = name
Exemple #5
0
class QueueSSDB(QueueBase.QueueBase):
    """
    base class , only provide interface for sub class to implement
    """
    def __init__(self, name, host='localhost', port=8888, **kwargs):
        QueueBase.QueueBase.__init__(self, name, host, port)
        self.__conn = SSDB(connection_pool=BlockingConnectionPool(host=self.host, port=self.port))


    #queue
    @QueueBase.catch
    def put(self, value, *args, **kwargs):
        """
        put an  item in the back of a queue
        :param value:
        :param args:
        :param kwargs:
        :return:
        """
        return self.__conn.qpush_back(self.name,
                                      json.dumps(value, ensure_ascii=False).encode('utf-8') if isinstance(value, dict) or isinstance(value, list) else value)

    def save(self, value, *args, **kwargs):
        """
        put an  item in the back of a queue
        :param value:
        :param args:
        :param kwargs:
        :return:
        """
        return self.__conn.qpush_back(self.name,
                                      json.dumps(value, ensure_ascii = False).encode('utf-8') if isinstance(value,
                                                                                                            dict) or isinstance(
                                          value, list) else value)

    @QueueBase.catch
    def get(self, *args, **kwargs):
        """
        get element from the from of queue
        :param args:
        :param kwargs:
        :return:
        """
        value = self.__conn.qpop_front(self.name)
        return value[0] if value else value

    @QueueBase.catch
    def getMore(self, *args, **kwargs):
        """
        get element from the from of queue
        :param args:
        :param kwargs:
        :return:
        """
        value = self.__conn.qpop_front(self.name, **kwargs)
        return value

    @QueueBase.catch
    def size(self, *args, **kwargs):
        return self.__conn.qsize(self.name)

    @QueueBase.catch
    def changeTable(self, name):
        """
        change the queue name to operate
        :param name:
        :return:
        """
        self.name = name

    @QueueBase.catch
    def select_queue(self, name):
        """
        change the queue name to operate
        :param name:
        :return:
        """
        self.name = name

    @QueueBase.catch
    def qclaerQueue(self):
        return self.__conn.qclear(self.name)


    #KV
    @QueueBase.catch
    def keySet(self,key,value):
        """
        Set the value at key ``name`` to ``value`` .
        :param key:
        :param value:
        :return:
        """
        value = json.dumps(value, ensure_ascii = False).encode('utf-8') if isinstance(value,dict) or isinstance(value, list) else value
        return self.__conn.set(key,value)

    @QueueBase.catch
    def keySetx(self,name, value, ttl=-1):
        """
        Set the value of key ``name`` to ``value`` that expires in ``ttl``
        seconds. ``ttl`` can be represented by an integer or a Python
        timedelta object.
        :param name:
        :param value:
        :param ttl:
        :return:
        """
        return self.__conn.setx(name,value,ttl=ttl)

    @QueueBase.catch
    def keyTtl(self,key):
        """
        Returns the number of seconds until the key ``name`` will expire.
        :return:
        """
        self.__conn.ttl(key)

    @QueueBase.catch
    def keyGet(self,key):
        """
        Return the value at key ``name``, or ``None`` if the key doesn't exist
        :param key:
        :return:
        """
        return self.__conn.get(key)

    @QueueBase.catch
    def keyDel(self,key):
        """
        Delete the key specified by ``name`` .
        :param key:
        :return:
        """
        return self.__conn.delete(key)

    @QueueBase.catch
    def keyKeys(self,key_start='',key_end=''):
        """
        Return a list of the top ``limit`` keys between ``name_start`` and
        ``name_end``
        :param key_start:
        :param key_end:
        :return:
        """
        return self.__conn.keys(name_start=key_start,name_end=key_end,limit=100000)
    @QueueBase.catch
    def keyexists(self,key):
        """
        :param key:
        :return:
        """
        return self.__conn.exists(key)
    #SET
    @QueueBase.catch
    def zsetSet(self,field,score = 1):
        if field:
            if isinstance(field, dict) or isinstance(field, list):
                field = json.dumps(field)
            field = field if len(field) < 100 else field[:100]
        return self.__conn.zset(self.name, field, score)
    @QueueBase.catch
    def zgetSet(self,key):
        return self.__conn.zget(self.name,key)
    @QueueBase.catch
    def zexistsSet(self,name,field):
        return self.__conn.zexists(name,field)
    @QueueBase.catch
    def zkeysSet(self):
        return self.__conn.zkeys(self.name,'','','',limit=100000000)
    @QueueBase.catch
    def zdelSet(self,key):
        return self.__conn.zdel(self.name,key)
    @QueueBase.catch
    def multi_zgetSet(self,*keys):
        return self.__conn.multi_zget(self.name,*keys)
    #Hash
    @QueueBase.catch
    def hgetallHash(self,key):
        return self.__conn.hgetall(key)
    @QueueBase.catch
    def hincrHash(self,name,key):
        return self.__conn.hincr(name,key,amount=1)
    @QueueBase.catch
    def multi_hsetHash(self,name,**mapping):
        return self.__conn.multi_hset(name, **mapping)

    @QueueBase.catch
    def hlistHash(self,start,end):
        return self.__conn.hlist(start, end, limit =  10000000)
    @QueueBase.catch
    def hclearHash(self,key):
        return self.__conn.hclear(key)

    @QueueBase.catch
    def hset(self, key,value):
        return self.__conn.hset(self.name, key,json.dumps(value, ensure_ascii = False).encode('utf-8')
                                if isinstance(value,dict) or isinstance(value, list) else value)

    @QueueBase.catch
    def hsize(self):
        return self.__conn.hsize(self.name)

    @QueueBase.catch
    def hget(self, key = None):
        if key:
            return self.__conn.hget(self.name, key)
        else:
            if self.__conn.hsize(self.name) > 0:
                keys=self.__conn.hkeys(self.name,"", "",limit=1)
                if keys:
                    key=keys[0]
                    v=self.__conn.hget(self.name, key)
                    self.__conn.hdel(self.name, key)
                    return v