def select_subs_to_admin(select_netloc: str = None, _debug=False) -> dict: # 池内所有类型订阅 remain_subs = [] # 订阅池状态映射表 mapping_subs_status = {} # 链接-类型映射表 mapping_subs_type = {} # 清洗数据 for filed in CRAWLER_SEQUENCE: # 提取池内对应类型的所有订阅链接 filed_sbus: list = RedisClient().sync_remain_subs( REDIS_SECRET_KEY.format(filed)) # 更新汇总队列 remain_subs += filed_sbus # 提取subs netloc映射区间 urls = [urlparse(i[0]).netloc for i in filed_sbus] # 更新映射表 mapping_subs_status.update({filed: dict(Counter(urls))}) mapping_subs_type.update( zip([i[0] for i in filed_sbus], [ filed, ] * len(filed_sbus))) # 初始化状态下,返回订阅池状态 if not select_netloc: return {'msg': 'success', 'info': mapping_subs_status} # 指定netloc状态下,分发对应netloc的subscribe else: for tag in remain_subs: # 提取信息键 subscribe, end_life = tag[0], tag[-1] # 存在对应netloc的链接并可存活至少beyond小时 if select_netloc in urlparse(subscribe).netloc and not RedisClient( ).is_stale(end_life, beyond=6): logger.debug("<SuperAdmin> -- 获取订阅") try: return { 'msg': "success", 'debug': _debug, 'info': { "subscribe": subscribe, "endLife": end_life, 'subsType': mapping_subs_type[subscribe], "netloc": select_netloc } } finally: if not _debug: threading.Thread(target=detach, kwargs={ "subscribe": subscribe, 'beat_sync': True }).start() # 无库存或误码 return { 'msg': "failed", "netloc": select_netloc, "info": "指令错误或不存在该类型订阅", "status": mapping_subs_status }
def run_business(self): # 1.清洗过期订阅 if self.decouple: logger.info("<ClashTaskAsh> ash | 正在清洗订阅池...") SubscribesCleaner(debug=False).interface() # 2.拉取订阅池 logger.info("<ClashTaskAsh> ash | 正在拉取订阅堆...") rc = RedisClient().get_driver() rss_pool = [subscribe for key_ in CRAWLER_SEQUENCE for subscribe, _ in rc.hgetall(REDIS_SECRET_KEY.format(key_)).items()] # 2.1 筛选订阅防止重名 rss_dict = {} for url in rss_pool: rss_dict.update({f"{urlparse(url).netloc}@{urlparse(url).query}": url}) rss_pool = [i[-1] for i in rss_dict.items()] # 2.2 删除选中的订阅(取出) debug模式下不删除 if not self.debug: for subscribe in rss_pool: detach(subscribe=subscribe) # 3.订阅转换 logger.info("<ClashTaskAsh> ash | 正在转换订阅模式...") # 4.执行订阅转换并缓存配置文件 clash_adapter.api.run(subscribe=rss_pool) # 5.创建本地连接 启动Clash webbrowser.open(clash_adapter.api.url_scheme_download()['info'].format("http://127.0.0.1:8847/V2Ray云彩姬")) time.sleep(5) return True
class SubscribesCleaner(CoroutineSpeedup): """解耦清洗插件:国内IP调用很可能出现性能滑坡""" def __init__(self, debug=False, kill_target: str = None): super(SubscribesCleaner, self).__init__() self.debug = debug self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE] self.rc = RedisClient().get_driver() self.kill_ = kill_target def offload_task(self): for key_ in self.keys: for sub, _ in self.rc.hgetall(key_).items(): self.work_q.put_nowait([sub, key_]) def _del_subs(self, key_: str, subs: str, err_: str = '') -> None: self.rc.hdel(key_, subs) logger.debug(f'>> Detach -> {subs} -- {err_}') def control_driver(self, sub_info: List[str]): """ @param sub_info: [subs,key_secret_class] @return: """ try: # 解耦指定簇 if self.kill_ and self.kill_ in sub_info[0]: self._del_subs(sub_info[-1], sub_info[0], "target") else: # 解析订阅 node_info: dict = subs2node(sub_info[0], False) # 打印debug信息 if self.debug: print( f"check -- {node_info['subs']} -- {node_info['node'].__len__()}" ) # 订阅解耦 if node_info['node'].__len__() <= 4: self._del_subs(sub_info[-1], sub_info[0], "decouple") except UnicodeDecodeError or TypeError as e: logger.debug( f"Retry put the subscribe({sub_info}) to work queue -- {e}") # 单个链接重试3次,标记超时链接 if self.temp_cache.get(sub_info[0]): self.temp_cache[sub_info[0]] += 1 else: self.temp_cache[sub_info[0]] = 1 if self.temp_cache[sub_info[0]] <= 3: self.work_q.put_nowait(sub_info) else: self._del_subs(sub_info[-1], sub_info[0], e) except SystemExit: logger.critical("请关闭系统代理后再执行订阅清洗操作") except Exception as e: logger.warning(f"{sub_info} -- {e}") self._del_subs(sub_info[-1], sub_info[0])
def reset_task() -> list: import random from src.BusinessCentralLayer.middleware.redis_io import RedisClient from src.BusinessCentralLayer.setting import SINGLE_TASK_CAP, REDIS_SECRET_KEY rc = RedisClient() running_state = dict(zip(CRAWLER_SEQUENCE, [[] for _ in range(len(CRAWLER_SEQUENCE))])) action_list = __entropy__.copy() qsize = len(action_list) random.shuffle(action_list) try: # 进行各个类型的实体任务的分类 for task_name in CRAWLER_SEQUENCE: # 获取池中对应类型的数据剩余 storage_remain: int = rc.get_len(REDIS_SECRET_KEY.format(f'{task_name}')) # 进行各个类型的实体任务的分类 for atomic in action_list: permission = {} if atomic.get('hyper_params') is None else atomic.get('hyper_params') if permission.get(task_name) is True: running_state[task_name].append(atomic) # 在库数据溢出 返回空执行队列 if storage_remain >= SINGLE_TASK_CAP: running_state[task_name] = [] # 缓存+保存数据超过风险阈值 while storage_remain + qsize > int(SINGLE_TASK_CAP * 0.8): if len(running_state[task_name]) < 1: break running_state[task_name].pop() qsize -= 1 instances = [atomic for i in list(running_state.values()) if i for atomic in i] return instances # 网络异常,主动捕获RedisClient()的连接错误 except ConnectionError: return []
def startup_ddt_overdue(self, task_name: str = None): if task_name is None: for task_name in self.deploy_cluster: RedisClient().refresh( key_name=REDIS_SECRET_KEY.format(task_name), cross_threshold=3) else: RedisClient().refresh(key_name=REDIS_SECRET_KEY.format(task_name), cross_threshold=3)
def to_redis(): r = RedisClient().get_driver() for docker in Middleware.cache_redis_queue.items(): key_name = REDIS_SECRET_KEY.format(docker[0]) if docker[-1]: r.hset(key_name, mapping=docker[-1]) # logger.success(f">> PUSH -> Redis") for k in Middleware.cache_redis_queue.keys(): Middleware.cache_redis_queue[k] = {}
def load_subs_set(self, sub_type): subs_mapping = {} for sub, _ in RedisClient().sync_remain_subs( REDIS_SECRET_KEY.format(sub_type)): subs_mapping.update({urlparse(sub).netloc: sub}) # not debug 移除池中订阅 if not self.debug: RedisClient().get_driver().hdel( REDIS_SECRET_KEY.format(sub_type, sub)) subs = list(subs_mapping.values()) return subs
def _check_permission(self, sckey): sckey_path = self.SCKEY_PATH_ROOT.format(sckey) rc = RedisClient().get_driver() if rc.exists(sckey_path): if rc.hget(sckey, key="SURVIVE") == "True": self.permission['crate'] = False self.permission['read'] = True self.permission['update'] = True self.permission['survive'] = True self.permission['delete'] = True else: self.permission['delete'] = False else: self.permission['delete'] = False
def register_auth(self, sckey): """ :param sckey: db_token :return: """ sckey_path = self.SCKEY_PATH_ROOT.format(sckey) driver = RedisClient().get_driver() driver.hset(sckey_path, key="CREATE", value=str(datetime.now(TIME_ZONE_CN))) driver.hset(sckey_path, key="READ", value="None") driver.hset(sckey_path, key="UPDATE", value="None") driver.hset(sckey_path, key="DELETE", value="True") driver.hset(sckey_path, key="SURVIVE", value="True")
def pop_subs_to_admin(class_: str): """ @param class_: @return: """ logger.debug("<SuperAdmin> -- 获取订阅") from src.BusinessLogicLayer.cluster.sailor import manage_task try: # 获取该类型订阅剩余链接 remain_subs: list = RedisClient().sync_remain_subs( REDIS_SECRET_KEY.format(class_)) while True: # 若无可用链接则返回错误信息 if remain_subs.__len__() == 0: logger.error(f'<SuperAdmin> -- 无可用<{class_}>订阅') return {'msg': 'failed', 'info': f"无可用<{class_}>订阅"} else: # 从池中获取(最新加入的)订阅s-e subs, end_life = remain_subs.pop() # 将s-e加入缓冲队列,该队列将被ddt的refresh工作流同过期链接一同删除 # 使用缓冲队列的方案保证节拍同步,防止过热操作/失误操作贯穿Redis # 既当管理员通过此接口获取链接时,被返回的链接不会直接从池中删去 # 而是触发缓冲机制,既将该链接标记后加入apollo缓冲队列 # apollo队列内的元素都是欲删除缓存,当ddt发动后会一次性情况当前所有的缓存 # 对订阅进行质量粗检 # if subs2node(subs=subs, cache_path=False, timeout=2)['node'].__len__() <= 3: # logger.debug(f"<check> BadLink -- {subs}") # continue # 使用节拍同步线程锁发起连接池回滚指令,仅生成/同步一枚原子任务 threading.Thread(target=manage_task, kwargs={ "class_": class_, "only_sync": True }).start() logger.success('管理员模式--链接分发成功') # 立即执行链接解耦,将同一账号的所有订阅移除 # beat_sync =True立即刷新,False延迟刷新(节拍同步) threading.Thread(target=detach, kwargs={ "subscribe": subs, 'beat_sync': True }).start() return { 'msg': 'success', 'subscribe': subs, 'subsType': class_ } except Exception as e: logger.exception(e) return {'msg': 'failed', 'info': str(e)}
def detach(subscribe, beat_sync=False): """ @param subscribe: @param beat_sync: 是否立即删除, True:立即删除,False:节拍同步,随ddt删除 @return: """ from faker import Faker from urllib.parse import urlparse # 清洗出订阅中的token token = urlparse(subscribe).path r = RedisClient().get_driver() # 遍历所有任务类型 for task in CRAWLER_SEQUENCE: # 遍历某种类型的链接池 for sub in r.hgetall(REDIS_SECRET_KEY.format(task)).items(): # 匹配用户token if token == urlparse(sub[0]).path: # 若节拍同步,立即移除订阅 if beat_sync: r.hdel(REDIS_SECRET_KEY.format(task), sub[0]) logger.debug(f'>> Detach -> {sub[0]}') # 否则将订阅过期时间标记为过期,该链接将随下一波任一节点的ddt任务被删除 else: r.hset(REDIS_SECRET_KEY.format(task), sub[0], str(Faker().past_datetime())) break
def _update_entropy(rc=None): # 组合entropy标注数据 try: atomic_queue = [] for i in __entropy__: work_filed = [ f"{j[0].upper()}" for j in i['hyper_params'].items() if j[-1] ] work_filed = "&".join(work_filed).strip() atomic_item = f"|{work_filed}| {i['name']}" atomic_queue.append(atomic_item) # 更新列表 if rc is None: rc = RedisClient() rc.get_driver().set(name=REDIS_SECRET_KEY.format("__entropy__"), value="$".join(atomic_queue)) except Exception as e: logger.exception(e)
def apis_get_subs_num() -> dict: return RedisClient().subs_info()
def apis_admin_get_entropy() -> list: return RedisClient().get_driver().get(REDIS_SECRET_KEY.format("__entropy__")).split("$")
def __init__(self, debug=False, kill_target: str = None): super(SubscribesCleaner, self).__init__() self.debug = debug self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE] self.rc = RedisClient().get_driver() self.kill_ = kill_target
def remove_auth(sckey): RedisClient().get_driver().hset(sckey, key="SURVIVE", value="False")
class SubscribesCleaner(lsu): """解耦清洗插件:国内IP调用很可能出现性能滑坡""" def __init__(self, debug=False, kill_target: str = None): super(SubscribesCleaner, self).__init__() self.debug = debug self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE] self.rc = RedisClient().get_driver() self.kill_ = kill_target def offload_task(self): for key_ in self.keys: for sub, _ in self.rc.hgetall(key_).items(): self.work_q.put_nowait([sub, key_]) def killer(self): """ @todo redis批量移除或移动hash @return: """ if self.apollo: for kill_ in self.apollo: self.rc.hdel(kill_[0], kill_[-1]) logger.debug(f'>> Detach -> {kill_[-1]}') def control_driver(self, sub_info: List[str]): """ @param sub_info: [subs,key_secret_class] @return: """ try: # 解耦指定簇 if self.kill_ and self.kill_ in sub_info[0]: self.apollo.append([sub_info[-1], sub_info[0]]) else: # 解析订阅 node_info: dict = subs2node(sub_info[0], False) # 打印debug信息 if self.debug: print( f"check -- {node_info['subs']} -- {node_info['node'].__len__()}" ) # 订阅解耦 if node_info['node'].__len__() <= 3: self.apollo.append([sub_info[-1], sub_info[0]]) except UnicodeDecodeError or TypeError as e: logger.debug( f"Retry put the subscribe({sub_info}) to work queue -- {e}") # 单个链接重试3次,标记超时链接 if self.temp_cache.get(sub_info[0]): self.temp_cache[sub_info[0]] += 1 else: self.temp_cache[sub_info[0]] = 1 if self.temp_cache[sub_info[0]] <= 3: self.work_q.put_nowait(sub_info) else: self.apollo.append([sub_info[-1], sub_info[0]]) except Exception as e: logger.warning(f"{sub_info} -- {e}") self.apollo.append([sub_info[-1], sub_info[0]])
class SubscribesCleaner(CoroutineSpeedup): """解耦清洗插件:国内IP调用很可能出现性能滑坡""" def __init__(self, debug=False, kill_target: str = None): super(SubscribesCleaner, self).__init__() self.debug = debug self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE] self.rc = RedisClient().get_driver() self.kill_ = kill_target def offload_task(self): for key_ in self.keys: for sub, _ in self.rc.hgetall(key_).items(): self.work_q.put_nowait([sub, key_]) def _del_subs(self, key_: str, subs: str, err_) -> None: self.rc.hdel(key_, subs) # logger.debug(f'>> Detach -> {subs} -- {err_}') print(Fore.BLUE, f"[{datetime.now()}] detach -> {subs} {err_}") def control_driver(self, sub_info: List[str], threshold: int = 4): """ :param sub_info: [subs,key_secret_class] :param threshold: 解耦置信阈值 小于或等于这个值的订阅将被剔除 :return: """ try: # 针对指定订阅源进行清洗工作 if self.kill_ and self.kill_ in sub_info[0]: self._del_subs(sub_info[-1], sub_info[0], "target active removal") else: # 解析订阅 node_info: dict = subs2node(sub_info[0]) # 订阅解耦 if node_info['node'].__len__() <= threshold: self._del_subs(sub_info[-1], sub_info[0], "decouple active removal") elif self.debug: print( Fore.WHITE, f"[{datetime.now()}] valid -- {node_info['subs']} -- {len(node_info['node'])}" ) except (UnicodeDecodeError, TypeError) as e: # 对于已标记“解析错误”的订阅 更新其请求次数 if self.temp_cache.get(sub_info[0]): self.temp_cache[sub_info[0]] += 1 # 否则标记为“解析错误”的订阅 else: print(Fore.YELLOW, f"[{datetime.now()}] recheck -- {sub_info[0]}") self.temp_cache[sub_info[0]] = 1 # 若链接重试次数少于3次 重添加至任务队列尾部 if self.temp_cache[sub_info[0]] <= 3: self.work_q.put_nowait(sub_info) # 若链接重试次数大于3次 剔除 else: self._del_subs(sub_info[-1], sub_info[0], e) except SystemExit: warnings.warn("请关闭系统代理后部署订阅清洗任务") except Exception as e: logger.warning(f"{sub_info} -- {e}") self._del_subs(sub_info[-1], sub_info[0], e)
if "[1]V2Ray订阅链接" in usr_c: resp = sp.run(mode="v2ray") elif "[2]SSR订阅链接" in usr_c: resp = sp.run(mode="ssr") elif "[3]Trojan订阅连接" in usr_c: resp = sp.run(mode="trojan") elif "[4]查询可用链接" in usr_c: resp = sp.find_available_subscribe() elif "[5]返回" in usr_c: resp = True else: resp = False except TypeError: resp = True finally: return resp # -------------------------------- # API接口初始化 # -------------------------------- if ThreadPoolExecutor(max_workers=1).submit(NetChainReview().run).result(): rc = RedisClient() else: logger_local.warning("网络异常") easygui.msgbox("网络异常", title=TITLE) exit() if __name__ == '__main__': V2RaycSpiderMasterPanel().home_menu()
def sync_actions( class_: str, mode_sync: str = None, only_sync=False, beat_sync=True, ): """ @param class_: @param mode_sync: 是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务 @param only_sync: @param beat_sync: @return: """ logger.info( f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...") # ================================================ # 节拍停顿 原子同步 # ================================================ rc = RedisClient() _state = _is_overflow(task_name=class_, rc=rc) if _state == 'stop': return _state # ================================================ # 更新任务信息 # ================================================ # 公示即将发动的采集任务数据 _update_entropy(rc=rc, entropy=__entropy__) # 通由工厂读取映射表批量生产采集器运行实体 sync_queue: list = ActionShunt(class_, silence=True, beat_sync=beat_sync).shunt() # 打乱任务序列 random.shuffle(sync_queue) # ================================================ # $执行核心业务 # ================================================ if mode_sync == 'upload': # fixme:临时方案:解决链接溢出问题 if round(rc.get_len(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP: logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持") return None # 持续实例化采集任务 for _ in range(sync_queue.__len__()): rc.sync_message_queue(mode='upload', message=class_) # 节拍同步线程锁 if only_sync: logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务") break logger.success("<TaskManager> UploadTasks -- 任务上传完毕") elif mode_sync == 'download': async_queue: list = [] while True: # 获取原子任务 atomic = rc.sync_message_queue(mode='download') # 若原子有效则同步数据 if atomic and atomic in CRAWLER_SEQUENCE: # 判断同步状态 # 防止过载。当本地缓冲任务即将突破容载极限时停止同步 # _state 状态有三,continue/offload/stop _state = _is_overflow(task_name=atomic, rc=rc) if _state != 'continue': return _state if async_queue.__len__() == 0: async_queue = ActionShunt(atomic, silence=True, beat_sync=beat_sync).shunt() random.shuffle(async_queue) # 将采集器实体推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(async_queue.pop()) logger.info( f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})' ) # 节拍同步线程锁 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务" ) return 'offload' else: return 'offload' elif mode_sync == 'force_run': for slave_ in sync_queue: # ================================================================================================ # TODO v5.4.r 版本新增特性 scaffold spawn # 1. 之前版本中通由scaffold 无论运行 run 还是 force-run 指令都无法在队列满载的情况下启动采集任务 # 主要原因在于如下几行代码加了锁 # 2. 通过新增的spawn指令可绕过此模块通由SpawnBooster直接编译底层代码启动采集器 # ================================================================================================ # force_run :适用于单机部署或单步调试下 # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数 _state = _is_overflow(task_name=class_, rc=rc) if _state != 'continue': return _state # 将采集器实体推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(slave_) # 节拍同步线程锁 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'stop' return 'offload'
class SubscribesCleaner(CoroutineSpeedup): """解耦清洗插件:国内IP调用很可能出现性能滑坡""" def __init__(self, debug=False, kill_target: str = None): super(SubscribesCleaner, self).__init__() self.debug = debug self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE] self.rc = RedisClient().get_driver() self.kill_ = kill_target def offload_task(self): for key_ in self.keys: try: for sub, _ in self.rc.hgetall(key_).items(): self.work_q.put_nowait([sub, key_]) except redis_error.ResponseError: logger.critical("Link pool is broken down.") def _del_subs(self, key_: str, subs: str, err_) -> None: try: self.rc.hdel(key_, subs) terminal_echo(f"detach -> {subs} {err_}", 3) except redis_error.ConnectionError: logger.critical( "<SubscribeCleaner> The local network communication is abnormal." ) def control_driver(self, sub_info: List[str], threshold: int = 4): """ :param sub_info: [subs,key_secret_class] :param threshold: 解耦置信阈值 小于或等于这个值的订阅将被剔除 :return: """ super(SubscribesCleaner, self).control_driver(task=sub_info) try: # 针对指定订阅源进行清洗工作 if self.kill_ and self.kill_ in sub_info[0]: self._del_subs(sub_info[-1], sub_info[0], "target active removal") else: # 解析订阅 node_info: dict = subs2node(sub_info[0]) # 订阅解耦 if node_info['node'].__len__() <= threshold: self._del_subs(sub_info[-1], sub_info[0], "decouple active removal") elif self.debug: terminal_echo( f"valid -- {node_info['subs']} -- {len(node_info['node'])}", 1) except (UnicodeDecodeError, TypeError) as e: # 对于已标记“解析错误”的订阅 更新其请求次数 if self.temp_cache.get(sub_info[0]): self.temp_cache[sub_info[0]] += 1 # 否则标记为“解析错误”的订阅 else: terminal_echo(f"recheck -- {sub_info[0]}", 2) self.temp_cache[sub_info[0]] = 1 # 若链接重试次数少于3次 重添加至任务队列尾部 if self.temp_cache[sub_info[0]] <= 3: self.work_q.put_nowait(sub_info) # 若链接重试次数大于3次 剔除 else: self._del_subs(sub_info[-1], sub_info[0], e) except SystemExit: warnings.warn("请关闭系统代理后部署订阅清洗任务") except Exception as e: logger.warning(f"{sub_info} -- {e}") self._del_subs(sub_info[-1], sub_info[0], e) def killer(self): if not self.debug: logger.success("<SubscribesCleaner> --> decouple compete.")
def _sync_actions( class_: str, mode_sync: str = None, only_sync=False, beat_sync=True, ): """ @param class_: @param mode_sync: 是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务 @param only_sync: @param beat_sync: @return: """ logger.info(f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...") # TODO 原子化同步行为 rc = RedisClient() # 拷贝生成队列,需使用copy()完成拷贝,否则pop()会影响actions-list本体 # [A-Cloud,B-Cloud, ...] task_list: list = actions.__all__.copy() random.shuffle(task_list) # 在本机环境中生成任务并加入消息队列 if mode_sync == 'upload': # 临时方案,解决链接溢出问题 if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP: logger.warning("<TaskManager> UploadHijack -- 连接池任务已溢出,上传任务被劫持") return None # 持续实例化采集任务 while True: if task_list.__len__() == 0: logger.success("<TaskManager> EmptyList -- 本机任务为空或已完全生成") break else: slave_ = task_list.pop() # 将相应的任务执行语句转换成exec语法 expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \ f'{slave_}(beat_sync={beat_sync}).run()' # 将执行语句同步至消息队列 rc.sync_message_queue(mode='upload', message=expr) # 节拍同步线程锁 if only_sync: logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务") break logger.info(f"<TaskManager> 本节点任务({actions.__all__.__len__()})已同步至消息队列," f"待集群接收订阅后既可完成后续任务") # 同步分布式消息队列的任务 elif mode_sync == 'download': while True: # 判断同步状态 # 防止过载。当本地缓冲任务即将突破容载极限时停止同步 # _state 状态有三,continue/offload/stop _state = _is_overflow(task_name=class_, rc=rc) if _state != 'continue': return _state # 获取原子任务,该任务应已封装为exec语法 # todo 将入队操作封装到redis里,以获得合理的循环退出条件 atomic = rc.sync_message_queue(mode='download') # 若原子有效则同步数据 if atomic: # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(atomic) logger.info(f'<TaskManager> offload atomic<{class_}>') # 节拍同步线程锁 if only_sync: logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'offload' # 否则打印警告日志并提前退出同步 else: logger.warning(f"<TaskManager> SyncFinish -- <{class_}>无可同步任务") break elif mode_sync == 'force_run': for slave_ in task_list: # force_run :适用于单机部署或单步调试下 _state = _is_overflow(task_name=class_, rc=rc) # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数 if _state == 'stop': return 'stop' # 将相应的任务执行语句转换成exec语法 expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \ f'{slave_}(beat_sync={beat_sync}).run()' # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(expr) # 在force_run模式下仍制约于节拍同步线程锁 # 此举服务于主机的订阅补充操作 # 优先级更高,不受队列可用容载影响强制中断同步操作 if only_sync: logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'stop' else: logger.success(f"<TaskManager> ForceCollect" f" -- 已将本地预设任务({actions.__all__.__len__()})录入待执行队列") return 'offload'
def _sync_actions( class_: str, mode_sync: str = None, only_sync=False, beat_sync=True, ): """ @param class_: @param mode_sync: 是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务 @param only_sync: @param beat_sync: @return: """ logger.info( f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...") # TODO 原子化同步行为 rc = RedisClient() # 节拍停顿 _state = _is_overflow(task_name=class_, rc=rc) if _state == 'stop': return _state sync_queue: list = ActionShunt(class_, silence=True, beat_sync=beat_sync).shunt() random.shuffle(sync_queue) # 在本机环境中生成任务并加入消息队列 if mode_sync == 'upload': # fixme:临时方案:解决链接溢出问题 if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP: logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持") return None # 持续实例化采集任务 for _ in range(sync_queue.__len__()): rc.sync_message_queue(mode='upload', message=class_) # 节拍同步线程锁 if only_sync: logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务") break logger.success("<TaskManager> UploadTasks -- 任务上传完毕") # 同步分布式消息队列的任务 elif mode_sync == 'download': async_queue: list = [] while True: # 获取原子任务 atomic = rc.sync_message_queue(mode='download') # 若原子有效则同步数据 if atomic and atomic in CRAWLER_SEQUENCE: # 判断同步状态 # 防止过载。当本地缓冲任务即将突破容载极限时停止同步 # _state 状态有三,continue/offload/stop _state = _is_overflow(task_name=atomic, rc=rc) if _state != 'continue': return _state if async_queue.__len__() == 0: async_queue = ActionShunt(atomic, silence=True, beat_sync=beat_sync).shunt() random.shuffle(async_queue) # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(async_queue.pop()) logger.info( f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})' ) # 节拍同步线程锁 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务" ) return 'offload' # 否则打印警告日志并提前退出同步 else: # logger.warning(f"<TaskManager> SyncFinish -- <{atomic}>无可同步任务") return 'offload' elif mode_sync == 'force_run': for slave_ in sync_queue: # force_run :适用于单机部署或单步调试下 # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数 _state = _is_overflow(task_name=class_, rc=rc) if _state != 'continue': return _state # 将执行语句推送至Poseidon本机消息队列 Middleware.poseidon.put_nowait(slave_) # 在force_run模式下仍制约于节拍同步线程锁 # 此举服务于主机的订阅补充操作 # 优先级更高,不受队列可用容载影响强制中断同步操作 if only_sync: logger.warning( f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务") return 'stop' return 'offload'