Example #1
0
def process(keyword):
    addr_list = tools.getIP()
    addr = addr_list[random.randint(0, len(addr_list)-1)]       # 随机选择一个代理IP
    """代理设置"""
    proxy = Proxy(
        {
            'proxy_type': ProxyType.MANUAL,
            'http_proxy': addr
        }
    )
    desired_capabilities = webdriver.DesiredCapabilities.PHANTOMJS
    proxy.add_to_capabilities(desired_capabilities)

    """1) 构造driver对象,并设置窗口尺寸"""
    driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities)
    driver.maximize_window()

    """2) 通过driver对象,实例化Sina类"""
    sina = Sina(DRIVER=driver)

    """3) 实现登录,两种登录方式二选一"""
    sina.login2()
    # sina.login()

    """4) 搜索"""
    sina.search(keyword)

    """连接到SSDB数据库"""
    client = SSDB(host='', port=8884)

    """获取结果集"""
    results = list()
    page = 1
    while len(results) < 100:   #至少返回100条数据
        print "+++++++++++++++++++++++++++++++++++++++", "page", page
        block_list = sina.extract()
        results.extend(block_list)

        page += 1

        if sina.nextPage():
            continue
        else:
            break

    key = "weibo_"+keyword+"_"+tools.getTimeAsStr()
    value = json.dumps(results)
    client.set(key, value)

    print "ssdb save", key, len(results)
Example #2
0
class SSDBKV(object):
    def __init__(self,
                 host="127.0.0.1",
                 port=8888,
                 max_connections=10,
                 timeout=60):
        self.host = host
        self.port = port
        self.max_connections = max_connections
        self.timeout = timeout
        pool = BlockingConnectionPool(connection_class=Connection,
                                      max_connections=max_connections,
                                      timeout=timeout,
                                      host=host,
                                      port=port)
        self.ssdb = SSDB(connection_pool=pool)

    def set(self, key, value):
        return self.ssdb.set(key, value)

    def get(self, key):
        return self.ssdb.get(key)

    def delete(self, key):
        return self.ssdb.delete(key)

    def keys(self, name_start=0, name_end=0xFFFFFFFF, limit=10):
        return self.ssdb.keys(name_start, name_end, limit)

    def exists(self, key):
        return self.ssdb.exists(key)
Example #3
0
        values = dict([[i["username"], timestamp] for i in res])
        ssdb.multi_zset("ig-last-crawled", **values)
        ssdb.multi_zset("ig-last-updated", **values)
        
        values = dict([[i[_id], i["username"]] for i in res])
        ssdb.multi_hset("ig-username-id", **values)

        for c, user in enumerate(json.loads(open("results.json").read())[:]):
                print c
                un = user["username"]
                cols = ["profile_pic_url","full_name","followers","following","username"]
                vals = [user[c] for c in cols]
                info = dict(zip(cols, vals))
        
                #Information
                ssdb.set("ig-{0}-user-bio".format(un), user["biography"])
                ssdb.set("ig-{0}-user-info".format(un), info)
        
                # TODO multizset
                ssdb.zset("ig-{0}-followers".format(un), timestamp, user["followers"])
                ssdb.zset("ig-{0}-following".format(un), timestamp, user["following"])
                ssdb.zset("ig-{0}-picture-count".format(un), timestamp, user["picture-count"])
                
onlyfiles = [f for f in listdir(".") if isfile(join(".", f))]
onlyfiles = [i for i in onlyfiles if "pictures-" in i]
for file_name in onlyfiles:
        timestamp = arrow.utcnow().timestamp
        res = json.loads(open(file_name).read())[:]

        values = dict([[i["code"], 0 for i in res])
        ssdb.multi_zset("instagram-pictures", **values)
Example #4
0
class QueueSSDB(QueueBase.QueueBase):
    """
    base class , only provide interface for sub class to implement
    """
    def __init__(self, name, host='localhost', port=8888, **kwargs):
        QueueBase.QueueBase.__init__(self, name, host, port)
        self.__conn = SSDB(connection_pool=BlockingConnectionPool(host=self.host, port=self.port))


    #queue
    @QueueBase.catch
    def put(self, value, *args, **kwargs):
        """
        put an  item in the back of a queue
        :param value:
        :param args:
        :param kwargs:
        :return:
        """
        return self.__conn.qpush_back(self.name,
                                      json.dumps(value, ensure_ascii=False).encode('utf-8') if isinstance(value, dict) or isinstance(value, list) else value)

    def save(self, value, *args, **kwargs):
        """
        put an  item in the back of a queue
        :param value:
        :param args:
        :param kwargs:
        :return:
        """
        return self.__conn.qpush_back(self.name,
                                      json.dumps(value, ensure_ascii = False).encode('utf-8') if isinstance(value,
                                                                                                            dict) or isinstance(
                                          value, list) else value)

    @QueueBase.catch
    def get(self, *args, **kwargs):
        """
        get element from the from of queue
        :param args:
        :param kwargs:
        :return:
        """
        value = self.__conn.qpop_front(self.name)
        return value[0] if value else value

    @QueueBase.catch
    def getMore(self, *args, **kwargs):
        """
        get element from the from of queue
        :param args:
        :param kwargs:
        :return:
        """
        value = self.__conn.qpop_front(self.name, **kwargs)
        return value

    @QueueBase.catch
    def size(self, *args, **kwargs):
        return self.__conn.qsize(self.name)

    @QueueBase.catch
    def changeTable(self, name):
        """
        change the queue name to operate
        :param name:
        :return:
        """
        self.name = name

    @QueueBase.catch
    def select_queue(self, name):
        """
        change the queue name to operate
        :param name:
        :return:
        """
        self.name = name

    @QueueBase.catch
    def qclaerQueue(self):
        return self.__conn.qclear(self.name)


    #KV
    @QueueBase.catch
    def keySet(self,key,value):
        """
        Set the value at key ``name`` to ``value`` .
        :param key:
        :param value:
        :return:
        """
        value = json.dumps(value, ensure_ascii = False).encode('utf-8') if isinstance(value,dict) or isinstance(value, list) else value
        return self.__conn.set(key,value)

    @QueueBase.catch
    def keySetx(self,name, value, ttl=-1):
        """
        Set the value of key ``name`` to ``value`` that expires in ``ttl``
        seconds. ``ttl`` can be represented by an integer or a Python
        timedelta object.
        :param name:
        :param value:
        :param ttl:
        :return:
        """
        return self.__conn.setx(name,value,ttl=ttl)

    @QueueBase.catch
    def keyTtl(self,key):
        """
        Returns the number of seconds until the key ``name`` will expire.
        :return:
        """
        self.__conn.ttl(key)

    @QueueBase.catch
    def keyGet(self,key):
        """
        Return the value at key ``name``, or ``None`` if the key doesn't exist
        :param key:
        :return:
        """
        return self.__conn.get(key)

    @QueueBase.catch
    def keyDel(self,key):
        """
        Delete the key specified by ``name`` .
        :param key:
        :return:
        """
        return self.__conn.delete(key)

    @QueueBase.catch
    def keyKeys(self,key_start='',key_end=''):
        """
        Return a list of the top ``limit`` keys between ``name_start`` and
        ``name_end``
        :param key_start:
        :param key_end:
        :return:
        """
        return self.__conn.keys(name_start=key_start,name_end=key_end,limit=100000)
    @QueueBase.catch
    def keyexists(self,key):
        """
        :param key:
        :return:
        """
        return self.__conn.exists(key)
    #SET
    @QueueBase.catch
    def zsetSet(self,field,score = 1):
        if field:
            if isinstance(field, dict) or isinstance(field, list):
                field = json.dumps(field)
            field = field if len(field) < 100 else field[:100]
        return self.__conn.zset(self.name, field, score)
    @QueueBase.catch
    def zgetSet(self,key):
        return self.__conn.zget(self.name,key)
    @QueueBase.catch
    def zexistsSet(self,name,field):
        return self.__conn.zexists(name,field)
    @QueueBase.catch
    def zkeysSet(self):
        return self.__conn.zkeys(self.name,'','','',limit=100000000)
    @QueueBase.catch
    def zdelSet(self,key):
        return self.__conn.zdel(self.name,key)
    @QueueBase.catch
    def multi_zgetSet(self,*keys):
        return self.__conn.multi_zget(self.name,*keys)
    #Hash
    @QueueBase.catch
    def hgetallHash(self,key):
        return self.__conn.hgetall(key)
    @QueueBase.catch
    def hincrHash(self,name,key):
        return self.__conn.hincr(name,key,amount=1)
    @QueueBase.catch
    def multi_hsetHash(self,name,**mapping):
        return self.__conn.multi_hset(name, **mapping)

    @QueueBase.catch
    def hlistHash(self,start,end):
        return self.__conn.hlist(start, end, limit =  10000000)
    @QueueBase.catch
    def hclearHash(self,key):
        return self.__conn.hclear(key)

    @QueueBase.catch
    def hset(self, key,value):
        return self.__conn.hset(self.name, key,json.dumps(value, ensure_ascii = False).encode('utf-8')
                                if isinstance(value,dict) or isinstance(value, list) else value)

    @QueueBase.catch
    def hsize(self):
        return self.__conn.hsize(self.name)

    @QueueBase.catch
    def hget(self, key = None):
        if key:
            return self.__conn.hget(self.name, key)
        else:
            if self.__conn.hsize(self.name) > 0:
                keys=self.__conn.hkeys(self.name,"", "",limit=1)
                if keys:
                    key=keys[0]
                    v=self.__conn.hget(self.name, key)
                    self.__conn.hdel(self.name, key)
                    return v