def login(self): """ @return: 1 成功 0 失败 """ try: # 预检查 if not self.is_time_to_login(): log.info("此账号尚未到登陆时间: {}".format(self.username)) time.sleep(5) return 0 cookies = self.create_cookie() if not cookies: raise Exception("登陆失败 未获取到合法cookie") if not isinstance(cookies, dict): raise Exception("cookie 必须为字典格式") # 保存cookie self.set_login_time() self.set_cookies(cookies) log.info("登录成功 {}".format(self.username)) self.record_user_status(LimitTimesUserStatus.LOGIN_SUCCESS) return 1 except Exception as e: log.exception(e) send_msg( msg=f"{self.SITE_NAME} {self.username} 账号登陆异常 exception: {str(e)}", level="error", message_prefix=f"{self.SITE_NAME} {self.username} 账号登陆异常", ) log.info("登录失败 {}".format(self.username)) self.record_user_status(LimitTimesUserStatus.LOGIN_FALIED) return 0
def send_msg(self, msg, level="debug", message_prefix=""): # log.debug("发送报警 level:{} msg{}".format(level, msg)) tools.send_msg(msg=msg, level=level, message_prefix=message_prefix)
def run(self, username=None): while True: try: with RedisLock(key=self._tab_user_pool, lock_timeout=3600, wait_timeout=0) as _lock: if _lock.locked: self.__sycn_users_info() online_user = 0 for user in self.users: if username and username != user.username: continue try: if user.cookies: online_user += 1 continue # 预检查 if not user.is_time_to_login(): log.info("账号{}与上次登录时间间隔过短,暂不登录: 将在{}登录使用". format(user.username, user.next_login_time())) continue user = self.login(user) if user.cookies: # 保存cookie user.set_login_time() self.add_user(user) self.record_user_status( user.user_id, GoldUserStatus.LOGIN_SUCCESS) log.debug("登录成功 {}".format(user.username)) online_user += 1 else: log.info("登录失败 {}".format(user.username)) self.record_user_status( user.user_id, GoldUserStatus.LOGIN_FALIED) except NotImplementedError: log.error( f"{self.__class__.__name__} must be implementation login method!" ) os._exit(0) except Exception as e: log.exception(e) msg = f"{user.username} 账号登陆失败 exception: {str(e)}" log.info(msg) self.record_user_status( user.user_id, GoldUserStatus.LOGIN_FALIED) send_msg( msg=msg, level="error", message_prefix=f"{user.username} 账号登陆失败", ) log.info("当前在线user数为 {}".format(online_user)) if self._keep_alive: time.sleep(10) else: break except Exception as e: log.exception(e) time.sleep(1)
def __add_item_to_db(self, items, update_items, requests, callbacks, items_fingerprints): export_success = True self._is_adding_to_db = True # 去重 if setting.ITEM_FILTER_ENABLE: items, items_fingerprints = self.__dedup_items( items, items_fingerprints) # 分捡 items_dict = self.__pick_items(items) update_items_dict = self.__pick_items(update_items, is_update_item=True) # item批量入库 failed_items = {"add": [], "update": [], "requests": []} while items_dict: table, datas = items_dict.popitem() log.debug(""" -------------- item 批量入库 -------------- 表名: %s datas: %s """ % (table, tools.dumps_json(datas, indent=16))) if not self.__export_to_db(table, datas): export_success = False failed_items["add"].append({"table": table, "datas": datas}) # 执行批量update while update_items_dict: table, datas = update_items_dict.popitem() log.debug(""" -------------- item 批量更新 -------------- 表名: %s datas: %s """ % (table, tools.dumps_json(datas, indent=16))) update_keys = self._item_update_keys.get(table) if not self.__export_to_db( table, datas, is_update=True, update_keys=update_keys): export_success = False failed_items["update"].append({"table": table, "datas": datas}) if export_success: # 执行回调 while callbacks: try: callback = callbacks.pop(0) callback() except Exception as e: log.exception(e) # 删除做过的request if requests: self.redis_db.zrem(self._table_request, requests) # 去重入库 if setting.ITEM_FILTER_ENABLE: if items_fingerprints: self.__class__.dedup.add(items_fingerprints, skip_check=True) else: failed_items["requests"] = requests if self.export_retry_times > setting.EXPORT_DATA_MAX_RETRY_TIMES: if self._redis_key != "air_spider": # 失败的item记录到redis self.redis_db.sadd(self._table_failed_items, failed_items) # 删除做过的request if requests: self.redis_db.zrem(self._table_request, requests) log.error("入库超过最大重试次数,不再重试,数据记录到redis,items:\n {}".format( tools.dumps_json(failed_items))) self.export_retry_times = 0 else: tip = ["入库不成功"] if callbacks: tip.append("不执行回调") if requests: tip.append("不删除任务") exists = self.redis_db.zexists(self._table_request, requests) for exist, request in zip(exists, requests): if exist: self.redis_db.zadd(self._table_request, requests, 300) if setting.ITEM_FILTER_ENABLE: tip.append("数据不入去重库") if self._redis_key != "air_spider": tip.append("将自动重试") tip.append("失败items:\n {}".format( tools.dumps_json(failed_items))) log.error(",".join(tip)) self.export_falied_times += 1 if self._redis_key != "air_spider": self.export_retry_times += 1 if self.export_falied_times > setting.EXPORT_DATA_MAX_FAILED_TIMES: # 报警 msg = "《{}》爬虫导出数据失败,失败次数:{},请检查爬虫是否正常".format( self._redis_key, self.export_falied_times) log.error(msg) tools.send_msg( msg=msg, level="error", message_prefix="《%s》爬虫导出数据失败" % (self._redis_key), ) self._is_adding_to_db = False