class SSDBHashMap(object, SSDBBase): def __init__(self, hashmap_name, host="127.0.0.1", port=8888, max_connections=10, timeout=60): self.hashmap_name = hashmap_name self.host = host self.port = port self.max_connections = max_connections self.timeout = timeout pool = BlockingConnectionPool(connection_class=Connection, max_connections=max_connections, timeout=timeout, host=host, port=port) self.ssdb = SSDB(connection_pool=pool) SSDBBase.__init__(self, self.ssdb) def set(self, key, value): return self.ssdb.hset(self.hashmap_name, key, value) def get(self, key): return self.ssdb.hget(self.hashmap_name, key) def delete(self, key): return self.ssdb.hdel(self.hashmap_name, key) def keys(self, name_start="", name_end="", limit=10): return self.ssdb.hkeys(self.hashmap_name, name_start, name_end, limit) def exists(self, key): return self.ssdb.hexists(self.hashmap_name, key) def size(self): return self.ssdb.hsize(self.hashmap_name) def list(self, name_start="", name_end="", limit=10): #列出名字处于区间 (name_start, name_end] 的 hashmap return self.ssdb.hlist(name_start, name_end, limit) def scan(self, key_start, key_end="", limit=10): return self.ssdb.hscan(self.hashmap_name, key_start, key_end, limit) def clear(self): return self.ssdb.hclear(self.hashmap_name)
class ssdb_proxy(object): def __init__(self, set_name, host, port, max_connections=2, timeout=60): """数据库初始化""" self.set_name = set_name self.host = host self.port = port self.max_connections = max_connections self.timeout = timeout pool = BlockingConnectionPool(connection_class=Connection, max_connections=max_connections, timeout=timeout, host=host, port=port) self.ssdb = SSDB(connection_pool=pool) pass def hset(self, key, value): """添加集合""" return self.ssdb.hset(self.set_name, key, value) def hget(self, key): """获取key所对应的值""" return self.ssdb.hget(self.set_name, key) def hgetall(self): """获取所有的数据""" return self.ssdb.hgetall(self.set_name) def hdel(self, key): """删除数据""" return self.ssdb.hdel(self.set_name, key) def hexists(self, key): """判断Key是否存在""" return self.ssdb.hexists(self.set_name, key) def size(self): """获取结合的大小""" return self.ssdb.hsize(self.set_name) def clean(self): """清楚所有数据""" return self.ssdb.hclear(self.set_name)
# Get URLS from SSDB and turn into a text file called links import subprocess import os from ssdb import SSDB ssdb = SSDB(host='104.131.54.255', port=8888) # get hostid hostid = subprocess.check_output(['curl','http://ipinfo.io/ip']).strip() limit = ssdb.hget("crawl","limit") offset = ssdb.hget("crawl", hostid) users = ssdb.zrange("instagram-users", int(offset), int(limit)) arr = users f = open("links", "w") f.write("\n".join(map(lambda x: str(x), arr))) f.close()
class SsdbClient(object): """ SSDB client SSDB中代理存放的容器为hash: 原始代理存放在name为raw_proxy的hash中,key为代理的ip:port,value为None,以后扩展可能会加入代理属性; 验证后供flask使用的代理存放在name为useful_proxy的hash中,key为代理的ip:port,value为None,以后扩展可能会加入代理属性; """ def __init__(self, name, host, port): """ init :param name: hash name :param host: ssdb host :param port: ssdb port :return: """ self.name = name self.__conn = SSDB( connection_pool=BlockingConnectionPool(host=host, port=port)) def get(self): """ get an item 从useful_proxy_queue随机获取一个可用代理, 使用前需要调用changeTable("useful_proxy_queue") :return: """ values = self.__conn.hgetall(name=self.name) return random.choice(values.keys()) if values else None def put(self, key): """ put an item 将代理放入hash, 使用changeTable指定hash name :param key: :return: """ key = json.dump(key, ensure_ascii=False).encode('utf-8') if isinstance( key, (dict, list)) else key return self.__conn.hincr(self.name, key, 1) # return self.__conn.hset(self.name, value, None) def getvalue(self, key): value = self.__conn.hget(self.name, key) return value if value else None def pop(self): """ pop an item 弹出一个代理, 使用changeTable指定hash name :return: """ key = self.get() if key: self.__conn.hdel(self.name, key) return key def delete(self, key): """ Remove the ``key`` from hash ``name`` :param key: :return: """ self.__conn.hdel(self.name, key) def inckey(self, key, value): self.__conn.hincr(self.name, key, value) def getAll(self): return self.__conn.hgetall(self.name).keys() def get_status(self): """ Return the number of elements in hash ``name`` :return: """ return self.__conn.hsize(self.name) def changeTable(self, name): self.name = name
class QueueSSDB(QueueBase.QueueBase): """ base class , only provide interface for sub class to implement """ def __init__(self, name, host='localhost', port=8888, **kwargs): QueueBase.QueueBase.__init__(self, name, host, port) self.__conn = SSDB(connection_pool=BlockingConnectionPool(host=self.host, port=self.port)) #queue @QueueBase.catch def put(self, value, *args, **kwargs): """ put an item in the back of a queue :param value: :param args: :param kwargs: :return: """ return self.__conn.qpush_back(self.name, json.dumps(value, ensure_ascii=False).encode('utf-8') if isinstance(value, dict) or isinstance(value, list) else value) def save(self, value, *args, **kwargs): """ put an item in the back of a queue :param value: :param args: :param kwargs: :return: """ return self.__conn.qpush_back(self.name, json.dumps(value, ensure_ascii = False).encode('utf-8') if isinstance(value, dict) or isinstance( value, list) else value) @QueueBase.catch def get(self, *args, **kwargs): """ get element from the from of queue :param args: :param kwargs: :return: """ value = self.__conn.qpop_front(self.name) return value[0] if value else value @QueueBase.catch def getMore(self, *args, **kwargs): """ get element from the from of queue :param args: :param kwargs: :return: """ value = self.__conn.qpop_front(self.name, **kwargs) return value @QueueBase.catch def size(self, *args, **kwargs): return self.__conn.qsize(self.name) @QueueBase.catch def changeTable(self, name): """ change the queue name to operate :param name: :return: """ self.name = name @QueueBase.catch def select_queue(self, name): """ change the queue name to operate :param name: :return: """ self.name = name @QueueBase.catch def qclaerQueue(self): return self.__conn.qclear(self.name) #KV @QueueBase.catch def keySet(self,key,value): """ Set the value at key ``name`` to ``value`` . :param key: :param value: :return: """ value = json.dumps(value, ensure_ascii = False).encode('utf-8') if isinstance(value,dict) or isinstance(value, list) else value return self.__conn.set(key,value) @QueueBase.catch def keySetx(self,name, value, ttl=-1): """ Set the value of key ``name`` to ``value`` that expires in ``ttl`` seconds. ``ttl`` can be represented by an integer or a Python timedelta object. :param name: :param value: :param ttl: :return: """ return self.__conn.setx(name,value,ttl=ttl) @QueueBase.catch def keyTtl(self,key): """ Returns the number of seconds until the key ``name`` will expire. :return: """ self.__conn.ttl(key) @QueueBase.catch def keyGet(self,key): """ Return the value at key ``name``, or ``None`` if the key doesn't exist :param key: :return: """ return self.__conn.get(key) @QueueBase.catch def keyDel(self,key): """ Delete the key specified by ``name`` . :param key: :return: """ return self.__conn.delete(key) @QueueBase.catch def keyKeys(self,key_start='',key_end=''): """ Return a list of the top ``limit`` keys between ``name_start`` and ``name_end`` :param key_start: :param key_end: :return: """ return self.__conn.keys(name_start=key_start,name_end=key_end,limit=100000) @QueueBase.catch def keyexists(self,key): """ :param key: :return: """ return self.__conn.exists(key) #SET @QueueBase.catch def zsetSet(self,field,score = 1): if field: if isinstance(field, dict) or isinstance(field, list): field = json.dumps(field) field = field if len(field) < 100 else field[:100] return self.__conn.zset(self.name, field, score) @QueueBase.catch def zgetSet(self,key): return self.__conn.zget(self.name,key) @QueueBase.catch def zexistsSet(self,name,field): return self.__conn.zexists(name,field) @QueueBase.catch def zkeysSet(self): return self.__conn.zkeys(self.name,'','','',limit=100000000) @QueueBase.catch def zdelSet(self,key): return self.__conn.zdel(self.name,key) @QueueBase.catch def multi_zgetSet(self,*keys): return self.__conn.multi_zget(self.name,*keys) #Hash @QueueBase.catch def hgetallHash(self,key): return self.__conn.hgetall(key) @QueueBase.catch def hincrHash(self,name,key): return self.__conn.hincr(name,key,amount=1) @QueueBase.catch def multi_hsetHash(self,name,**mapping): return self.__conn.multi_hset(name, **mapping) @QueueBase.catch def hlistHash(self,start,end): return self.__conn.hlist(start, end, limit = 10000000) @QueueBase.catch def hclearHash(self,key): return self.__conn.hclear(key) @QueueBase.catch def hset(self, key,value): return self.__conn.hset(self.name, key,json.dumps(value, ensure_ascii = False).encode('utf-8') if isinstance(value,dict) or isinstance(value, list) else value) @QueueBase.catch def hsize(self): return self.__conn.hsize(self.name) @QueueBase.catch def hget(self, key = None): if key: return self.__conn.hget(self.name, key) else: if self.__conn.hsize(self.name) > 0: keys=self.__conn.hkeys(self.name,"", "",limit=1) if keys: key=keys[0] v=self.__conn.hget(self.name, key) self.__conn.hdel(self.name, key) return v