def search_account_from_mongo(self, log): ''' 从数据库查询账号用于登录 :return: ''' num = 10 while num: try: try: account_info = json.loads( redis_conn.rpop('tyc_account_lists').decode()) except Exception as e: print(e) account_info = None if account_info: self.account_id = account_info['_id'] # self.account_id = account_info['_id'] # 账号 self.account_num = account_info['account_name'] # self.account_num = 15144581447 log.info("[INFO]: 正在使用账号{}".format(self.account_num)) # 密码 self.password = account_info["password"] break else: num -= 1 sleep(1) continue except Exception as e: log.info("[ERROR]: {}".format(e)) log.info("[ERROR]: 数据库查询账号失败") if num == 0: log.info("数据库没有符合要求的账号") self.driver.quit() quit()
def get_name_from_redis(self): """ 获取搜索队列模块 :return:返回姓名信息包括_id,name :rtype:dict or None """ while True: # 从姓名队列获取一条数据 try: name_info = json.loads( redis_conn.rpop('tyc_name_lists').decode()) # 去数据库查询该条数据 res = name_results_coll.find_one({'_id': name_info['_id']}) # 如果flag !=0 表示已经抓取过,重新获取一条数据 if res.get('flag'): print('已爬取') continue except Exception as e: self.log.error("数据库查询人名失败, ERROR:{}".format(e)) time.sleep(30) continue # 如果数据存在返回信息(_id,name) if name_info: self.log.info("姓名:{}".format(name_info['name'])) if len(name_info['name']) > 3: # 修改标记位(不能爬取) collection = {'_id': name_info['_id']} query = {'$set': {'flag': 3}} self.update_database_status('name', collection, query) continue return { 'name_id': name_info['_id'], 'name': name_info['name'], } else: # 查询条件,跟写入姓名队列的查询条件一致 query = { 'company_numm': { '$lte': 100, '$gt': 0 }, 'flag': { '$ne': 1 } } one = name_results_coll.find_one(query) # 如果有数据说名队列补充还没有完成,等一分钟后重试,否则表示该条件下的姓名已经抓取完成,程序结束 if one: sleep(60) self.log.info('缓存中暂时没有数据,60s后重试...') continue else: self.log.info("数据库没有符合要求的人名") self.driver.quit()
def search_name_from_mongo(self, log): ''' 从数据库查询人名 :return: ''' # skip_count = self.skip_count while True: try: #添加新字段 try: name_info = json.loads( redis_conn.rpop('tyc_name_lists').decode()) res = name_results_coll.find_one({'_id': name_info['_id']}) if res.get('flag'): print('已爬取') continue except Exception as e: print(e) name_info = None if name_info: # _id self._id = name_info["_id"] # 人名 self.man_name = name_info["name"] self.last_name = name_info["last name"] if len(self.man_name) > 10: #切割取值 self.handle_long_name(log) continue else: log.info("姓名:{}".format(self.man_name)) break else: one = name_results_coll.find_one( {"name_num": { '$exists': False }}) if one: sleep(60) log.info('缓存中暂时没有数据,60s后重试...') continue else: log.info("数据库没有符合要求的人名") self.driver.quit() quit() except Exception as e: log.error(e) log.error("数据库查询人名失败") time.sleep(5)
def get_account_from_redis(self): """ 获取账号队列模块 :return:返回账号信息(_id, 账号, 密码) :rtype:dict """ while True: # 从队列获取一条账号信息,如果失败获取次数减1,从新获取直到获取成功 try: account_info = json.loads( redis_conn.rpop('tyc_account_lists').decode()) except Exception as e: self.log.error(e) self.log.error("数据库没有符合要求的账号或者redis出现错误, ERROR:{}".format(e)) sleep(10) continue # 如果获取到账号信息则返回 _id, 账号, 密码 if account_info: return { 'account_id': account_info['_id'], 'account_name': account_info['account_name'], 'password': account_info['password'], }