def sync_message_queue(self, mode: str, message: str = None): """ 同步消息队列,此处暂时仅用于任务队列的同步, @todo 数据结构预设为List,每次download获取(弹出)一个元素, @todo 读写分离,向 master上传,通过订阅同步到集群,slave从集群读入 @param message: 消息队列容器 @param mode: @return: """ # 发布任务订阅任务 if mode == "upload": if message: self.db.lpush("Poseidon", message) # logger.info(f"<RedisClient> UploadTask || message") return True else: logger.warning(f"<RedisClient> EmptyTask || 要上传的消息载体为空") return False # 同步任务队列,下载原子任务 elif mode == "download": if self.db.exists("Poseidon"): return self.db.lpop("Poseidon") else: return False
def ddt(task_name: str = None): if not task_name: _cd.startup_ddt_overdue() elif not (isinstance(task_name, str) and task_name in CRAWLER_SEQUENCE): logger.warning( "<Interface>传入的参数(task_name)不合法,任务类型必须被指定在CRAWLER_SEQUENCE之中") else: _cd.startup_ddt_overdue(task_name)
def _sync_launch_interval() -> dict: # 热读取配置文件 launch_interval = LAUNCH_INTERVAL for check in launch_interval.items(): if not check[-1] or check[-1] <= 1: logger.critical(f"<launch_interval>--{check[0]}设置出现致命错误,即将熔断线程。间隔为空或小于1") raise Exception if not isinstance(check[-1], int): logger.warning(f"<launch_interval>--{check[0]}任务间隔应为整型int,参数已拟合") launch_interval.update({check[0]: round(check[-1])}) if check[-1] < 60: logger.warning(f"<launch_interval>--{check[0]}任务频次过高,应不少于60/次,参数已拟合") launch_interval.update({check[0]: 60}) else: return launch_interval
def send_email(msg, to_: List[str] or str or set, headers: str = None): """ 发送运维信息,该函数仅用于发送简单文本信息 :param msg: 正文内容 :param to_: 发送对象 1. str to_ == 'self',发送给“自己” 2. List[str] 传入邮箱列表,群发邮件(内容相同)。 :param headers: :@todo 加入日志读取功能(open file)以及富文本信息功能(html邮件) :return: 默认为'<V2Ray云彩姬>运维日志' """ headers = headers if headers else '<V2Ray云彩姬>运维日志' sender = SMTP_ACCOUNT.get('email') password = SMTP_ACCOUNT.get('sid') smtp_server = 'smtp.qq.com' message = MIMEText(msg, 'plain', 'utf-8') message['From'] = Header('ARAI.DM', 'utf-8') # 发送者 message['Subject'] = Header(f"{headers}", 'utf-8') server = smtplib.SMTP_SSL(smtp_server, 465) # 输入转换 if to_ == 'self': to_ = set(sender, ) if isinstance(to_, str): to_ = [to_, ] if isinstance(to_, list): to_ = set(to_) if not isinstance(to_, set): return False try: server.login(sender, password) for to in to_: try: message['To'] = Header(to, 'utf-8') # 接收者 server.sendmail(sender, to, message.as_string()) logger.success("发送成功->{}".format(to)) except smtplib.SMTPRecipientsRefused: logger.warning('邮箱填写错误或不存在->{}'.format(to)) except Exception as e: logger.error('>>> 发送失败 || {}'.format(e)) finally: server.quit()
def refresh(self, key_name: str, cross_threshold: int = None) -> None: """ 原子级链接池刷新,一次性删去所有过期的key_name subscribe @param cross_threshold: 越过阈值删除订阅 @param key_name:secret_key @return: """ docker: dict = self.db.hgetall(key_name) # 管理员指令获取的链接 if self.__len__(key_name) != 0: for subscribe, end_life in docker.items(): if self.is_stale(end_life, cross_threshold): logger.debug(f'del-({key_name})--{subscribe}') self.db.hdel(key_name, subscribe) logger.success('<{}> UPDATE - {}({})'.format(self.__class__.__name__, key_name, self.__len__(key_name))) else: logger.warning('<{}> EMPTY - {}({})'.format(self.__class__.__name__, key_name, self.__len__(key_name)))
def push_info(self, user: dict or List[dict]): if isinstance(user, dict): user = [ user, ] elif not isinstance(user, list): logger.warning('MySQL add_user 调用格式有误') try: for user_ in user: try: sql = f'INSERT INTO v2raycs (' \ f'domain, subs, class_,end_life,res_time,passable,username,password,email,uuid) VALUES (' \ f'%s, %s, %s,%s, %s, %s,%s, %s, %s,%s)' val = (user_["domain"], user_["subs"], user_['class_'], user_['end_life'], user_["res_time"], user_['passable'], user_['username'], user_["password"], user_['email'], user_['uuid']) self.cursor.execute(sql, val) except KeyError as e: logger.warning( f"MySQL数据解析出错,user:dict必须同时包含username、password以及email的键值对{e}" ) # return 702 except pymysql.err.IntegrityError as e: logger.warning( f'{user_["username"]} -- 用户已在库,若需修改用户信息,请使用更新指令{e}') # return 701 else: logger.success(f'{user_["username"]} -- 用户添加成功') # return 700 finally: self.conn.commit() self.conn.close()
def _is_overflow(task_name: str, rc=None): """ 判断当前缓存是否已达单机采集极限 @param task_name: class_ @param rc: RedisClient Object Driver API @return: --stop: 停止任务同步并结束本轮采集任务 --offload:停止任务同步并开始执行采集任务 --continue:继续同步任务 """ # TODO 将缓存操作原子化 cap: int = SINGLE_TASK_CAP # 获取当前仓库剩余 storage_remain: int = rc.__len__(REDIS_SECRET_KEY.format(f'{task_name}')) # 获取本机任务缓存 cache_size: int = Middleware.poseidon.qsize() # 判断任务队列是否达到满载状态或已溢出 if storage_remain >= cap: logger.warning( f'<TaskManager> OverFlow || 任务溢出<{task_name}>({storage_remain}/{cap})' ) return 'stop' # 判断缓冲队列是否已达单机采集极限 # 未防止绝对溢出,此处限制单机任务数不可超过满载值的~x% # x = 1 if signal collector else x = 1/sum (Number of processes) elif storage_remain + cache_size >= round(cap * 0.8): # 若已达或超过单机采集极限,则休眠任务 logger.debug( f'<TaskManager> BeatPause || 节拍停顿<{task_name}>({storage_remain + cache_size}/{cap})' ) return 'offload' # 否则可以继续同步任务 else: return 'continue'
def __init__(self): super(RedisDataDisasterTolerance, self).__init__() from BusinessCentralLayer.setting import REDIS_SLAVER_DDT if not REDIS_SLAVER_DDT.get('host'): logger.warning('未设置数据容灾服务器,该职能将由Master执行') # 拷贝参数 redis_virtual = REDIS_MASTER # 改动浅拷贝数据库 redis_virtual.update({'db': redis_virtual['db'] + 1}) logger.debug("备份重定向 --> {}".format(redis_virtual)) else: redis_virtual = REDIS_SLAVER_DDT # 容器初始化 self.docker = {} try: self.acm = RedisClient(host=redis_virtual['host'], port=redis_virtual['port'], password=redis_virtual['password']) logger.info("DDT: Master({}) -> Slaver({})".format(REDIS_MASTER['host'], redis_virtual['host'])) except redis.exceptions.ConnectionError as e: logger.exception(e) finally: self.redis_virtual = redis_virtual
def control_driver(self, sub_info: List[str]): """ @param sub_info: [subs,key_secret_class] @return: """ try: # 解耦指定簇 if self.kill_ and self.kill_ in sub_info[0]: self.apollo.append([sub_info[-1], sub_info[0]]) else: # 解析订阅 node_info: dict = subs2node(sub_info[0], False) # 打印debug信息 if self.debug: print( f"check -- {node_info['subs']} -- {node_info['node'].__len__()}" ) # 订阅解耦 if node_info['node'].__len__() <= 3: self.apollo.append([sub_info[-1], sub_info[0]]) except UnicodeDecodeError or TypeError as e: logger.debug( f"Retry put the subscribe({sub_info}) to work queue -- {e}") # 单个链接重试3次,标记超时链接 if self.temp_cache.get(sub_info[0]): self.temp_cache[sub_info[0]] += 1 else: self.temp_cache[sub_info[0]] = 1 if self.temp_cache[sub_info[0]] <= 3: self.work_q.put_nowait(sub_info) else: self.apollo.append([sub_info[-1], sub_info[0]]) except Exception as e: logger.warning(f"{sub_info} -- {e}")
def run(self, class_: str) -> None: """ Data disaster tolerance or one class_ @param class_: subscribe type `ssr` or `v2ray` or `trojan` ... @return: """ key_name = REDIS_SECRET_KEY.format(class_) self.refresh(key_name, cross_threshold=6) # 数据拷贝 ... -> self for subscribe, end_life in self.db.hgetall(key_name).items(): self.docker.update({subscribe: end_life}) # logger.info("{} {}".format(key_name, subscribe)) # 映射迁移 acm <- ... try: self.acm.get_driver().hset(key_name, mapping=self.docker) except redis.exceptions.DataError: logger.warning(f'({class_}):缓存可能被击穿或缓存为空,请系统管理员及时维护链接池!') except redis.exceptions.ConnectionError: logger.error(f"redis-slave {self.redis_virtual} 可能宕机") except Exception as e: logger.exception(e)
def _sync_actions( class_: str, mode_sync: str = None, only_sync=False, beat_sync=True, ): """ @param class_: @param mode_sync: 是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务 @param only_sync: @param beat_sync: @return: """ logger.info( f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...") # TODO 原子化同步行为 rc = RedisClient() # 拷贝生成队列,需使用copy()完成拷贝,否则pop()会影响actions-list本体 task_list: list = actions.__all__.copy() random.shuffle(task_list) # 在本机环境中生成任务并加入消息队列 if mode_sync == 'upload': # 持续实例化采集任务 while True: if task_list.__len__() == 0: logger.success("<TaskManager> EmptyList -- 本机任务为空或已完全生成") break else: slave_ = task_list.pop() # 将相应的任务执行语句转换成exec语法 expr = f'from BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \ f'{slave_}(beat_sync={beat_sync}).run()' # 将执行语句同步至消息队列 rc.sync_message_queue(mode='upload', message=expr) # 节拍同步线程锁 if only_sync: logger.warning( "<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务") break logger.info( f"<TaskManager> 本节点任务({actions.__all__.__len__()})已同步至消息队列," f"待集群接收订阅后既可完成后续任务") # 同步分布式消息队列的任务 elif mode_sync == 'download': while True: # 判断同步状态 # 防止过载。当本地缓冲任务即将突破容载极限时停止同步 # _state 状态有三,continue/offload/stop _state = _is_overflow(task_name=class_, rc=rc) if _state != 'continue': return _state # 获取原子任务,该任务应已封装为exec语法 # todo 将入队操作封装到redis里,以获得合理的循环退出条件 atomic = rc.sync_message_queue(mode='download') # 若原子有效则同步数据 if atomic: # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(atomic) logger.info(f'<TaskManager> offload atomic<{class_}>') # 节拍同步线程锁 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务" ) return 'offload' # 否则打印警告日志并提前退出同步 else: logger.warning(f"<TaskManager> SyncFinish -- <{class_}>无可同步任务") break elif mode_sync == 'force_run': for slave_ in task_list: # force_run :适用于单机部署或单步调试下 _state = _is_overflow(task_name=class_, rc=rc) # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数 if _state == 'stop': return 'stop' # 将相应的任务执行语句转换成exec语法 expr = f'from BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \ f'{slave_}(beat_sync={beat_sync}).run()' # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(expr) # 在force_run模式下仍制约于节拍同步线程锁 # 此举服务于主机的订阅补充操作 # 优先级更高,不受队列可用容载影响强制中断同步操作 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'stop' else: logger.success(f"<TaskManager> ForceCollect" f" -- 已将本地预设任务({actions.__all__.__len__()})录入待执行队列") return 'offload'
def manage_task(class_: str = 'v2ray', speedup: bool = True, only_sync=False, startup=None, beat_sync=True, force_run=None) -> bool: """ 加载任务 @param force_run: debug模式下的强制运行,可逃逸队列满载检测 @param startup:创建协程工作空间,并开始并发执行队列任务。 @param only_sync:节拍同步线程锁。当本机任务数大于0时,将1枚原子任务推送至Poseidon协程空间。 @param class_: 任务类型,必须在 crawler seq内,如 ssr,v2ray or trojan。 @param speedup: 使用加速插件。默认使用coroutine-speedup。 @param beat_sync: @return: """ # ---------------------------------------------------- # 参数审查与转译 # ---------------------------------------------------- # 检查输入 if class_ not in CRAWLER_SEQUENCE or not isinstance(class_, str): return False # 审核采集权限,允许越权传参。当手动指定参数时,可授予本机采集权限,否则使用配置权限 local_work: bool = startup if startup else ENABLE_DEPLOY.get('tasks').get( 'collector') # 强制运行:指定参数优先级更高,若不指定则以是否单机部署模式决定运行force_run是否开启 # 默认单机模式下开启force_run # 若未传参时也未定义部署形式(null),则默认不使用force_run force_run = force_run if force_run else SINGLE_DEPLOYMENT # ---------------------------------------------------- # 解析同步模式 # ---------------------------------------------------- # 以本机是否有采集权限来区分download 以及upload两种同步模式 mode_sync = "download" if local_work else "upload" # 以更高优先级的`force_run` 替代传统同步模式,执行强制采集方案 mode_sync = "force_run" if force_run else mode_sync # ---------------------------------------------------- # 同步消息(任务)队列 # ---------------------------------------------------- # 当本机可采集时,将任务同步至本机执行,若消息队列为空则 # 若本机不可采集,则生成任务加入消息队列 response: str or bool = _sync_actions( class_=class_, only_sync=only_sync, beat_sync=beat_sync, mode_sync=mode_sync, ) # ---------------------------------------------------- # 初始化协程空间(执行任务) # ---------------------------------------------------- # 若本机开启了采集器权限则创建协程空间 # 若从control-deploy进入此函数,则说明本机必定具备创建协程空间权限 if force_run: if response == 'offload': logger.info(f'<TaskManager> ForceRun || <{class_}>采集任务启动') vsu(core=PuppetCore(), docker=Middleware.poseidon).run(speedup) logger.success(f'<TaskManager> ForceWorkFinish || <{class_}>采集任务结束') return True # if 'force_run' is False and the node has the permissions of collector if local_work: # if task queue can be work if response == 'offload': logger.info(f'<TaskManager> Run || <{class_}>采集任务启动') vsu(core=PuppetCore(), docker=Middleware.poseidon).run(speedup) logger.success(f'<TaskManager> Finish || <{class_}>采集任务结束') return True else: logger.warning(f"<TaskManager> Hijack<{class_}> || 当前节点不具备采集权限") return False