def catchUserInfoThread(self): s = ZhiHuSpider() d = DBUtil() st = Status.Catch() while self.isExit == False: #取出第一个用户 userId = d.getFirstUserToCatch() if userId is None: time.sleep(3) continue d.setUserIsCatch(userId, st.is_catching) #获取用户信息 dict = s.getUserInfo(userId) code = dict['code'] #用户没有价值 if code == s.code_user_not_useful: d.setUserIsCatch(userId, st.user_not_useful) #用户不存在 elif code == s.code_user_not_exist: d.setUserIsCatch(userId, st.user_not_exist) #抓取失败 elif code == s.code_failure: d.setUserIsCatch(userId, st.failed) #抓取成功 else: d.updateUserInfo(userId, dict) d.saveAchieveInfo(userId, dict)
def catchUserInfoThread(self, lock): s = ZhiHuSpider() db = DBUtil() st = Status.Catch() while not self.isExit: #加锁 lock.acquire() #获取第一个用户开始爬 userId = db.getFirstUserToCatch() if userId is None: lock.release() time.sleep(5) continue #设置为正在爬取 db.setUserIsCatch(userId, st.is_catching) lock.release() log('开始爬取用户,pid={0}, user_id={1}'.format(os.getpid(), userId)) #开始爬取用户信息 dict = s.getUserInfo(userId) code = dict['code'] # 用户没有价值 if code == s.code_user_not_useful: log('用户没有价值,pid={0}, user_id={1}'.format(os.getpid(), userId)) db.setUserIsCatch(userId, st.user_not_useful) # 用户不存在 elif code == s.code_user_not_exist: log('用户不存在,是僵尸粉,pid={0}, user_id={1}'.format( os.getpid(), userId)) db.setUserIsCatch(userId, st.user_not_exist) # 抓取失败 elif code == s.code_failure: log('用户抓取失败,pid={0}, user_id={1}'.format(os.getpid(), userId)) db.setUserIsCatch(userId, st.failed) # 抓取成功 else: log('用户抓取成功,pid={0}, user_id={1}'.format(os.getpid(), userId)) db.updateUserInfo(userId, dict) db.saveAchieveInfo(userId, dict) log('获取用户详细信息的线程结束,tid = {0}'.format(self.getThreadId()))
def catchUserInfoProcess(self,lock): s = ZhiHuSpider() db = DBUtil() st = Status.Catch() while True: #加锁 lock.acquire() #获取第一个用户开始爬 userId = db.getFirstUserToCatch() if userId is None: lock.release() time.sleep(3) continue #设置为正在爬取 db.setUserIsCatch(userId,st.is_catching) lock.release() print('开始爬取用户,pid={1}, user_id={0}'.format(os.getpid(),userId)) #开始爬取用户信息 dict = s.getUserInfo(userId) code = dict['code'] # 用户没有价值 if code == s.code_user_not_useful: print('用户没有价值,pid={1}, user_id={0}'.format(os.getpid(),userId)) db.setUserIsCatch(userId, st.user_not_useful) # 用户不存在 elif code == s.code_user_not_exist: print('用户不存在,是僵尸粉,pid={1}, user_id={0}'.format(os.getpid(), userId)) db.setUserIsCatch(userId, st.user_not_exist) # 抓取失败 elif code == s.code_failure: print('用户抓取失败,pid={1}, user_id={0}'.format(os.getpid(), userId)) db.setUserIsCatch(userId, st.failed) # 抓取成功 else: print('用户抓取成功,pid={1}, user_id={0}'.format(os.getpid(), userId)) db.updateUserInfo(userId, dict)