def run(self): for userId in userIdMap: success = self.cache.put("", userId, userIdMap[userId]) rootLogger.debug( "%s put in cache %s %s" % (threading.currentThread().getName(), userId, str(success))) time.sleep(random.randint(0, 10) / 10)
def saveUser(self, requestId, userInfo): rootLogger.debug(requestId + "UserMongoDaoImpl saveUser start") try: conn = self.getConn() db = conn[DB_ZHIHU] users = db[TABLE_USER] users.insert_one(userInfo) except Exception as e: rootLogger.error(requestId + "UserMongoDaoImpl saveUser error") rootLogger.error(e) rootLogger.debug(requestId + "UserMongoDaoImpl saveUser end")
def saveUsers(self, userInfoList): rootLogger.debug("UserMongoDaoImpl saveUsers start") try: conn = self.getConn() db = conn[DB_ZHIHU] users = db[TABLE_USER] users.insert_many(userInfoList) except Exception as e: rootLogger.error("UserMongoDaoImpl saveUsers error") rootLogger.error(e) rootLogger.debug("UserMongoDaoImpl saveUsers end")
def findUserByCondition(self, param): rootLogger.debug("UserMongoDaoImpl findUserByCondition start") try: conn = self.getConn() db = conn[DB_ZHIHU] users = db[TABLE_USER] # 检查参数 limit = KEY_LIMIT in param if param[KEY_LIMIT] else VALUE_LIMIT cursor = users.find().limit(limit) for doc in cursor: print(doc) except Exception as e: rootLogger.error("UserMongoDaoImpl findUserByCondition error") rootLogger.error(e) rootLogger.debug("UserMongoDaoImpl findUserByCondition end")
def getConn(self): rootLogger.debug("MongoDaoSupport getConn start") conn = mongo_client.MongoClient( host=MongoConfig.HOST, port=MongoConfig.PORT, document_class=MongoConfig.DOCUMENT_CLASS, tz_aware=MongoConfig.TZ_AWARE, connect=MongoConfig.CONNECT, maxPoolSize=MongoConfig.MAX_POOL_SIZE, minPoolSize=MongoConfig.MIN_POOL_SIZE ) # 建立和数据库系统的连接,创建Connection时,指定host及port参数 db_auth = conn.admin # admin 数据库有帐号,连接-认证-切换库 db_auth.authenticate(MongoConfig.USER, MongoConfig.PWD) try: conn.admin.command('ismaster') except ConnectionFailure: print("Server not available") rootLogger.debug("MongoDaoSupport getConn end") return conn
def doRequest(self, requestId, httpClientRequest): rootLogger.debug(requestId + "doRequest request: " + str(httpClientRequest)) if self.validatedRequest(httpClientRequest) != "": httpClientResponse = HttpClientResponse().getDefaultResponse() httpClientResponse["reason"] = self.validatedRequest( httpClientRequest) return httpClientResponse method = httpClientRequest["method"] if method == HttpCore.GET: httpClientResponse = self.doGetRequest(requestId, httpClientRequest) elif method == HttpCore.POST: httpClientResponse = self.doPostRequest(requestId, httpClientRequest) else: httpClientResponse = self.doGetRequest(requestId, httpClientRequest) # TODO: PUT # TODO: HEAD rootLogger.debug(requestId + "doRequest response: " + str(httpClientResponse)) return httpClientResponse
def buildUrl(self, requestId, request): url = request["url"] + self.buildParamsUrl(request["params"]) rootLogger.debug(requestId + "HttpClientImpl buildUrl:" + url) return url
def pull(self, requestId): message = RedisUtils.blpop(USER_DUPLICATE_QUEUE_NAME) rootLogger.debug(requestId + "UserDuplicateRedisQueue pull:" + str(message)) return self.convertStr2Dict(message)
"method": "GET", "url": "/api/v4/members/zhang-jia-wei/followers", "params": { "offset": 0, "limit": 20 }, "headers": { "Host": "www.zhihu.com", "Authorization": "Bearer Mi4wQUFBQU9qRWdBQUFBQU1MZThYVndDeGNBQUFCaEFsVk5VbzNzV0FESHZyN1FkdU53bllZOE5tRUtMZHRXTWNEcmRB|1489305735|7b00a5eb2137cfae4ad342e47322d92e18d1d3d6", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # 告知服务器采用何种压缩方式 # "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", "origin": "https://www.zhihu.com", "x-udid": "AADC3vF1cAuPTkgQezG76kgARvc5TuxQdrE=", "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain" } } """ # 测试抓取User信息 userResponse = httpClient.doRequest(userRequest) rootLogger.debug("HttpClientImpl User response:" + str(userResponse)) # 测试抓取Follower信息 followerResponse = httpClient.doRequest(followerRequest) rootLogger.debug("HttpClientImpl Follower response:" + str(followerResponse))
def startUserTask(self, n): while True: rootLogger.debug(time.strftime('%Y-%m-%d %X', time.localtime())) self.doUserTask() time.sleep(n) rootLogger.debug("done user task")
def parse(self): rootLogger.debug("BaseParser response...")
def push(self, requestId, request): rootLogger.debug(requestId + "FollowerRequestMemoryQueue push:" + str(request)) self.followerRequestMemoryQueue.put(request)
def getConn(self): rootLogger.debug("FileDaoSupport _getConn start") rootLogger.debug("FileDaoSupport _getConn end")
def get(self, requestId, userId): if RedisUtils.exists(userId) is True: userInfo = self.__userInfoMap[userId] RedisUtils.set(userId, userInfo) rootLogger.debug(requestId + "UserRedisCache get:" + str(userInfo)) return userInfo
def pull(self, requestId): request = RedisUtils.blpop(USER_REQ_QUEUE_NAME) rootLogger.debug(requestId + "UserRequestRedisQueue pull:" + str(request)) return self.convertStr2Dict(request)
(threading.currentThread().getName(), userId, str(success))) time.sleep(random.randint(0, 10) / 10) if __name__ == "__main__": startTimestamp = time.time() userIdMap = {} for x in range(0, 1000): userId = "userId" + str(x) userName = "******" + str(x) userInfo = {"userId": userId, "userName": userName} userIdMap[userId] = userInfo rootLogger.debug(userIdMap) cache = UserRedisCache() threadAList = [] for x in range(0, 10): thread = ThreadA("UserRedisCache ThreadA" + str(x), cache, userIdMap) threadAList.append(thread) threadBList = [] for x in range(0, 10): thread = ThreadB("UserRedisCache ThreadB" + str(x), cache, userIdMap) threadBList.append(thread) for thread in threadAList: thread.start()
def put(self, requestId, userId, userInfo): if RedisUtils.exists(userId) is False: RedisUtils.set(userId, userInfo) rootLogger.debug(requestId + "UserRedisCache put:" + str(userInfo)) return True return False
def printAll(self): rootLogger.debug("UserMemoryCache:" + str(self.__userInfoMap)) rootLogger.debug("UserMemoryCache count:" + str(len(self.__userInfoMap.keys())))
def get(self, requestId, userId): if userId in self.__userInfoMap.keys(): userInfo = self.__userInfoMap[userId] rootLogger.debug(requestId + "UserMemoryCache get:" + str(userInfo)) return userInfo
def put(self, requestId, userId, userInfo): if userId not in self.__userInfoMap.keys(): self.__userInfoMap[userId] = userInfo rootLogger.debug(requestId + "UserMemoryCache put:" + str(userInfo)) return True return False
def doUserTask(self): rootLogger.debug("doUserTask...")
def push(self, requestId, message): rootLogger.debug(requestId + "UserDuplicateRedisQueue push:" + str(message)) RedisUtils.lpush(USER_DUPLICATE_QUEUE_NAME, message)
def startUserTask(self): rootLogger.debug("startUserTask...") t = threading.Timer(5, self.doUserTask) t.start() rootLogger.debug("do not wait user task done")
def pull(self, requestId): message = self.userDuplicateMemoryQueue.get() rootLogger.debug(requestId + "UserDuplicateMemoryQueue pull:" + str(message)) return message
def pull(self, requestId): request = self.followerRequestMemoryQueue.get() rootLogger.debug(requestId + "FollowerRequestMemoryQueue pull:" + str(request)) return request
def pull(self, requestId): response = RedisUtils.blpop(FOLLOWER_RESP_QUEUE_NAME) rootLogger.debug(requestId + "FollowerResponseRedisQueue pull:" + str(response)) return self.convertStr2Dict(response)
def push(self, requestId, request): rootLogger.debug(requestId + "UserRequestRedisQueue push:" + str(request)) RedisUtils.lpush(USER_REQ_QUEUE_NAME, request)
def push(self, requestId, response): rootLogger.debug(requestId + "UserResponseRedisQueue push:" + str(response)) RedisUtils.lpush(USER_RESP_QUEUE_NAME, response)
def push(self, requestId, message): rootLogger.debug(requestId + "UserDuplicateMemoryQueue push:" + str(message)) self.userDuplicateMemoryQueue.put(message)
def pull(self, requestId): response = self.userResponseMemoryQueue.get() rootLogger.debug(requestId + "UserResponseMemoryQueue pull:" + str(response)) return response