def select_subs_to_admin(select_netloc: str = None, _debug=False) -> dict:
    # 池内所有类型订阅
    remain_subs = []
    # 订阅池状态映射表
    mapping_subs_status = {}
    # 链接-类型映射表
    mapping_subs_type = {}
    # 清洗数据
    for filed in CRAWLER_SEQUENCE:
        # 提取池内对应类型的所有订阅链接
        filed_sbus: list = RedisClient().sync_remain_subs(
            REDIS_SECRET_KEY.format(filed))
        # 更新汇总队列
        remain_subs += filed_sbus
        # 提取subs netloc映射区间
        urls = [urlparse(i[0]).netloc for i in filed_sbus]
        # 更新映射表
        mapping_subs_status.update({filed: dict(Counter(urls))})
        mapping_subs_type.update(
            zip([i[0] for i in filed_sbus], [
                filed,
            ] * len(filed_sbus)))

    # 初始化状态下,返回订阅池状态
    if not select_netloc:
        return {'msg': 'success', 'info': mapping_subs_status}
    # 指定netloc状态下,分发对应netloc的subscribe
    else:
        for tag in remain_subs:
            # 提取信息键
            subscribe, end_life = tag[0], tag[-1]
            # 存在对应netloc的链接并可存活至少beyond小时
            if select_netloc in urlparse(subscribe).netloc and not RedisClient(
            ).is_stale(end_life, beyond=6):
                logger.debug("<SuperAdmin> -- 获取订阅")
                try:
                    return {
                        'msg': "success",
                        'debug': _debug,
                        'info': {
                            "subscribe": subscribe,
                            "endLife": end_life,
                            'subsType': mapping_subs_type[subscribe],
                            "netloc": select_netloc
                        }
                    }
                finally:
                    if not _debug:
                        threading.Thread(target=detach,
                                         kwargs={
                                             "subscribe": subscribe,
                                             'beat_sync': True
                                         }).start()
        # 无库存或误码
        return {
            'msg': "failed",
            "netloc": select_netloc,
            "info": "指令错误或不存在该类型订阅",
            "status": mapping_subs_status
        }
    def run_business(self):
        # 1.清洗过期订阅
        if self.decouple:
            logger.info("<ClashTaskAsh> ash | 正在清洗订阅池...")
            SubscribesCleaner(debug=False).interface()
        # 2.拉取订阅池
        logger.info("<ClashTaskAsh> ash | 正在拉取订阅堆...")
        rc = RedisClient().get_driver()
        rss_pool = [subscribe for key_ in CRAWLER_SEQUENCE for subscribe, _ in
                    rc.hgetall(REDIS_SECRET_KEY.format(key_)).items()]
        # 2.1 筛选订阅防止重名
        rss_dict = {}
        for url in rss_pool:
            rss_dict.update({f"{urlparse(url).netloc}@{urlparse(url).query}": url})
        rss_pool = [i[-1] for i in rss_dict.items()]

        # 2.2 删除选中的订阅(取出)  debug模式下不删除
        if not self.debug:
            for subscribe in rss_pool:
                detach(subscribe=subscribe)
        # 3.订阅转换
        logger.info("<ClashTaskAsh> ash | 正在转换订阅模式...")
        # 4.执行订阅转换并缓存配置文件
        clash_adapter.api.run(subscribe=rss_pool)
        # 5.创建本地连接 启动Clash
        webbrowser.open(clash_adapter.api.url_scheme_download()['info'].format("http://127.0.0.1:8847/V2Ray云彩姬"))
        time.sleep(5)
        return True
class SubscribesCleaner(CoroutineSpeedup):
    """解耦清洗插件:国内IP调用很可能出现性能滑坡"""
    def __init__(self, debug=False, kill_target: str = None):
        super(SubscribesCleaner, self).__init__()
        self.debug = debug
        self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE]
        self.rc = RedisClient().get_driver()
        self.kill_ = kill_target

    def offload_task(self):
        for key_ in self.keys:
            for sub, _ in self.rc.hgetall(key_).items():
                self.work_q.put_nowait([sub, key_])

    def _del_subs(self, key_: str, subs: str, err_: str = '') -> None:
        self.rc.hdel(key_, subs)
        logger.debug(f'>> Detach -> {subs} -- {err_}')

    def control_driver(self, sub_info: List[str]):
        """

        @param sub_info: [subs,key_secret_class]
        @return:
        """
        try:
            # 解耦指定簇
            if self.kill_ and self.kill_ in sub_info[0]:
                self._del_subs(sub_info[-1], sub_info[0], "target")

            else:
                # 解析订阅
                node_info: dict = subs2node(sub_info[0], False)
                # 打印debug信息
                if self.debug:
                    print(
                        f"check -- {node_info['subs']} -- {node_info['node'].__len__()}"
                    )
                # 订阅解耦
                if node_info['node'].__len__() <= 4:
                    self._del_subs(sub_info[-1], sub_info[0], "decouple")

        except UnicodeDecodeError or TypeError as e:
            logger.debug(
                f"Retry put the subscribe({sub_info}) to work queue -- {e}")

            # 单个链接重试3次,标记超时链接
            if self.temp_cache.get(sub_info[0]):
                self.temp_cache[sub_info[0]] += 1
            else:
                self.temp_cache[sub_info[0]] = 1
            if self.temp_cache[sub_info[0]] <= 3:
                self.work_q.put_nowait(sub_info)
            else:
                self._del_subs(sub_info[-1], sub_info[0], e)

        except SystemExit:
            logger.critical("请关闭系统代理后再执行订阅清洗操作")
        except Exception as e:
            logger.warning(f"{sub_info} -- {e}")
            self._del_subs(sub_info[-1], sub_info[0])
Exemple #4
0
def reset_task() -> list:
    import random
    from src.BusinessCentralLayer.middleware.redis_io import RedisClient
    from src.BusinessCentralLayer.setting import SINGLE_TASK_CAP, REDIS_SECRET_KEY

    rc = RedisClient()
    running_state = dict(zip(CRAWLER_SEQUENCE, [[] for _ in range(len(CRAWLER_SEQUENCE))]))
    action_list = __entropy__.copy()
    qsize = len(action_list)
    random.shuffle(action_list)
    try:
        # 进行各个类型的实体任务的分类
        for task_name in CRAWLER_SEQUENCE:
            # 获取池中对应类型的数据剩余
            storage_remain: int = rc.get_len(REDIS_SECRET_KEY.format(f'{task_name}'))
            # 进行各个类型的实体任务的分类
            for atomic in action_list:
                permission = {} if atomic.get('hyper_params') is None else atomic.get('hyper_params')
                if permission.get(task_name) is True:
                    running_state[task_name].append(atomic)
            # 在库数据溢出 返回空执行队列
            if storage_remain >= SINGLE_TASK_CAP:
                running_state[task_name] = []
            # 缓存+保存数据超过风险阈值
            while storage_remain + qsize > int(SINGLE_TASK_CAP * 0.8):
                if len(running_state[task_name]) < 1:
                    break
                running_state[task_name].pop()
                qsize -= 1

        instances = [atomic for i in list(running_state.values()) if i for atomic in i]
        return instances
    # 网络异常,主动捕获RedisClient()的连接错误
    except ConnectionError:
        return []
Exemple #5
0
 def startup_ddt_overdue(self, task_name: str = None):
     if task_name is None:
         for task_name in self.deploy_cluster:
             RedisClient().refresh(
                 key_name=REDIS_SECRET_KEY.format(task_name),
                 cross_threshold=3)
     else:
         RedisClient().refresh(key_name=REDIS_SECRET_KEY.format(task_name),
                               cross_threshold=3)
    def to_redis():
        r = RedisClient().get_driver()
        for docker in Middleware.cache_redis_queue.items():
            key_name = REDIS_SECRET_KEY.format(docker[0])
            if docker[-1]:
                r.hset(key_name, mapping=docker[-1])
        # logger.success(f">> PUSH -> Redis")

        for k in Middleware.cache_redis_queue.keys():
            Middleware.cache_redis_queue[k] = {}
Exemple #7
0
 def load_subs_set(self, sub_type):
     subs_mapping = {}
     for sub, _ in RedisClient().sync_remain_subs(
             REDIS_SECRET_KEY.format(sub_type)):
         subs_mapping.update({urlparse(sub).netloc: sub})
         # not debug 移除池中订阅
         if not self.debug:
             RedisClient().get_driver().hdel(
                 REDIS_SECRET_KEY.format(sub_type, sub))
     subs = list(subs_mapping.values())
     return subs
Exemple #8
0
 def _check_permission(self, sckey):
     sckey_path = self.SCKEY_PATH_ROOT.format(sckey)
     rc = RedisClient().get_driver()
     if rc.exists(sckey_path):
         if rc.hget(sckey, key="SURVIVE") == "True":
             self.permission['crate'] = False
             self.permission['read'] = True
             self.permission['update'] = True
             self.permission['survive'] = True
             self.permission['delete'] = True
         else:
             self.permission['delete'] = False
     else:
         self.permission['delete'] = False
Exemple #9
0
    def register_auth(self, sckey):
        """

        :param sckey: db_token
        :return:
        """
        sckey_path = self.SCKEY_PATH_ROOT.format(sckey)
        driver = RedisClient().get_driver()
        driver.hset(sckey_path,
                    key="CREATE",
                    value=str(datetime.now(TIME_ZONE_CN)))
        driver.hset(sckey_path, key="READ", value="None")
        driver.hset(sckey_path, key="UPDATE", value="None")
        driver.hset(sckey_path, key="DELETE", value="True")
        driver.hset(sckey_path, key="SURVIVE", value="True")
def pop_subs_to_admin(class_: str):
    """

    @param class_:
    @return:
    """
    logger.debug("<SuperAdmin> -- 获取订阅")
    from src.BusinessLogicLayer.cluster.sailor import manage_task

    try:
        # 获取该类型订阅剩余链接
        remain_subs: list = RedisClient().sync_remain_subs(
            REDIS_SECRET_KEY.format(class_))
        while True:
            # 若无可用链接则返回错误信息
            if remain_subs.__len__() == 0:
                logger.error(f'<SuperAdmin> --  无可用<{class_}>订阅')
                return {'msg': 'failed', 'info': f"无可用<{class_}>订阅"}
            else:
                # 从池中获取(最新加入的)订阅s-e
                subs, end_life = remain_subs.pop()

                # 将s-e加入缓冲队列,该队列将被ddt的refresh工作流同过期链接一同删除
                # 使用缓冲队列的方案保证节拍同步,防止过热操作/失误操作贯穿Redis

                # 既当管理员通过此接口获取链接时,被返回的链接不会直接从池中删去
                # 而是触发缓冲机制,既将该链接标记后加入apollo缓冲队列
                # apollo队列内的元素都是欲删除缓存,当ddt发动后会一次性情况当前所有的缓存

                # 对订阅进行质量粗检
                # if subs2node(subs=subs, cache_path=False, timeout=2)['node'].__len__() <= 3:
                #     logger.debug(f"<check> BadLink -- {subs}")
                #     continue

                # 使用节拍同步线程锁发起连接池回滚指令,仅生成/同步一枚原子任务
                threading.Thread(target=manage_task,
                                 kwargs={
                                     "class_": class_,
                                     "only_sync": True
                                 }).start()
                logger.success('管理员模式--链接分发成功')

                # 立即执行链接解耦,将同一账号的所有订阅移除
                # beat_sync =True立即刷新,False延迟刷新(节拍同步)
                threading.Thread(target=detach,
                                 kwargs={
                                     "subscribe": subs,
                                     'beat_sync': True
                                 }).start()

                return {
                    'msg': 'success',
                    'subscribe': subs,
                    'subsType': class_
                }
    except Exception as e:
        logger.exception(e)
        return {'msg': 'failed', 'info': str(e)}
def detach(subscribe, beat_sync=False):
    """

    @param subscribe:
    @param beat_sync: 是否立即删除, True:立即删除,False:节拍同步,随ddt删除
    @return:
    """
    from faker import Faker
    from urllib.parse import urlparse

    # 清洗出订阅中的token
    token = urlparse(subscribe).path

    r = RedisClient().get_driver()

    # 遍历所有任务类型
    for task in CRAWLER_SEQUENCE:
        # 遍历某种类型的链接池
        for sub in r.hgetall(REDIS_SECRET_KEY.format(task)).items():
            # 匹配用户token
            if token == urlparse(sub[0]).path:
                # 若节拍同步,立即移除订阅
                if beat_sync:
                    r.hdel(REDIS_SECRET_KEY.format(task), sub[0])
                    logger.debug(f'>> Detach -> {sub[0]}')
                # 否则将订阅过期时间标记为过期,该链接将随下一波任一节点的ddt任务被删除
                else:
                    r.hset(REDIS_SECRET_KEY.format(task), sub[0],
                           str(Faker().past_datetime()))
                break
def _update_entropy(rc=None):
    # 组合entropy标注数据
    try:
        atomic_queue = []
        for i in __entropy__:
            work_filed = [
                f"{j[0].upper()}" for j in i['hyper_params'].items() if j[-1]
            ]
            work_filed = "&".join(work_filed).strip()
            atomic_item = f"|{work_filed}| {i['name']}"
            atomic_queue.append(atomic_item)
        # 更新列表
        if rc is None:
            rc = RedisClient()
        rc.get_driver().set(name=REDIS_SECRET_KEY.format("__entropy__"),
                            value="$".join(atomic_queue))
    except Exception as e:
        logger.exception(e)
Exemple #13
0
def apis_get_subs_num() -> dict:
    return RedisClient().subs_info()
Exemple #14
0
def apis_admin_get_entropy() -> list:
    return RedisClient().get_driver().get(REDIS_SECRET_KEY.format("__entropy__")).split("$")
Exemple #15
0
 def __init__(self, debug=False, kill_target: str = None):
     super(SubscribesCleaner, self).__init__()
     self.debug = debug
     self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE]
     self.rc = RedisClient().get_driver()
     self.kill_ = kill_target
Exemple #16
0
 def remove_auth(sckey):
     RedisClient().get_driver().hset(sckey, key="SURVIVE", value="False")
Exemple #17
0
class SubscribesCleaner(lsu):
    """解耦清洗插件:国内IP调用很可能出现性能滑坡"""
    def __init__(self, debug=False, kill_target: str = None):
        super(SubscribesCleaner, self).__init__()
        self.debug = debug
        self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE]
        self.rc = RedisClient().get_driver()
        self.kill_ = kill_target

    def offload_task(self):
        for key_ in self.keys:
            for sub, _ in self.rc.hgetall(key_).items():
                self.work_q.put_nowait([sub, key_])

    def killer(self):
        """
        @todo redis批量移除或移动hash
        @return:
        """
        if self.apollo:
            for kill_ in self.apollo:
                self.rc.hdel(kill_[0], kill_[-1])
                logger.debug(f'>> Detach -> {kill_[-1]}')

    def control_driver(self, sub_info: List[str]):
        """

        @param sub_info: [subs,key_secret_class]
        @return:
        """
        try:
            # 解耦指定簇
            if self.kill_ and self.kill_ in sub_info[0]:
                self.apollo.append([sub_info[-1], sub_info[0]])
            else:
                # 解析订阅
                node_info: dict = subs2node(sub_info[0], False)
                # 打印debug信息
                if self.debug:
                    print(
                        f"check -- {node_info['subs']} -- {node_info['node'].__len__()}"
                    )
                # 订阅解耦
                if node_info['node'].__len__() <= 3:
                    self.apollo.append([sub_info[-1], sub_info[0]])
        except UnicodeDecodeError or TypeError as e:
            logger.debug(
                f"Retry put the subscribe({sub_info}) to work queue -- {e}")

            # 单个链接重试3次,标记超时链接
            if self.temp_cache.get(sub_info[0]):
                self.temp_cache[sub_info[0]] += 1
            else:
                self.temp_cache[sub_info[0]] = 1
            if self.temp_cache[sub_info[0]] <= 3:
                self.work_q.put_nowait(sub_info)
            else:
                self.apollo.append([sub_info[-1], sub_info[0]])

        except Exception as e:
            logger.warning(f"{sub_info} -- {e}")
            self.apollo.append([sub_info[-1], sub_info[0]])
Exemple #18
0
class SubscribesCleaner(CoroutineSpeedup):
    """解耦清洗插件:国内IP调用很可能出现性能滑坡"""
    def __init__(self, debug=False, kill_target: str = None):
        super(SubscribesCleaner, self).__init__()
        self.debug = debug
        self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE]
        self.rc = RedisClient().get_driver()
        self.kill_ = kill_target

    def offload_task(self):
        for key_ in self.keys:
            for sub, _ in self.rc.hgetall(key_).items():
                self.work_q.put_nowait([sub, key_])

    def _del_subs(self, key_: str, subs: str, err_) -> None:
        self.rc.hdel(key_, subs)
        # logger.debug(f'>> Detach -> {subs} -- {err_}')
        print(Fore.BLUE, f"[{datetime.now()}] detach -> {subs} {err_}")

    def control_driver(self, sub_info: List[str], threshold: int = 4):
        """

        :param sub_info: [subs,key_secret_class]
        :param threshold: 解耦置信阈值 小于或等于这个值的订阅将被剔除
        :return:
        """
        try:
            # 针对指定订阅源进行清洗工作
            if self.kill_ and self.kill_ in sub_info[0]:
                self._del_subs(sub_info[-1], sub_info[0],
                               "target active removal")
            else:
                # 解析订阅
                node_info: dict = subs2node(sub_info[0])
                # 订阅解耦
                if node_info['node'].__len__() <= threshold:
                    self._del_subs(sub_info[-1], sub_info[0],
                                   "decouple active removal")
                elif self.debug:
                    print(
                        Fore.WHITE,
                        f"[{datetime.now()}] valid -- {node_info['subs']} -- {len(node_info['node'])}"
                    )

        except (UnicodeDecodeError, TypeError) as e:
            # 对于已标记“解析错误”的订阅 更新其请求次数
            if self.temp_cache.get(sub_info[0]):
                self.temp_cache[sub_info[0]] += 1
            # 否则标记为“解析错误”的订阅
            else:
                print(Fore.YELLOW,
                      f"[{datetime.now()}] recheck -- {sub_info[0]}")
                self.temp_cache[sub_info[0]] = 1
            # 若链接重试次数少于3次 重添加至任务队列尾部
            if self.temp_cache[sub_info[0]] <= 3:
                self.work_q.put_nowait(sub_info)
            # 若链接重试次数大于3次 剔除
            else:
                self._del_subs(sub_info[-1], sub_info[0], e)
        except SystemExit:
            warnings.warn("请关闭系统代理后部署订阅清洗任务")
        except Exception as e:
            logger.warning(f"{sub_info} -- {e}")
            self._del_subs(sub_info[-1], sub_info[0], e)
Exemple #19
0
            if "[1]V2Ray订阅链接" in usr_c:
                resp = sp.run(mode="v2ray")
            elif "[2]SSR订阅链接" in usr_c:
                resp = sp.run(mode="ssr")
            elif "[3]Trojan订阅连接" in usr_c:
                resp = sp.run(mode="trojan")
            elif "[4]查询可用链接" in usr_c:
                resp = sp.find_available_subscribe()
            elif "[5]返回" in usr_c:
                resp = True
            else:
                resp = False
        except TypeError:
            resp = True
        finally:
            return resp


# --------------------------------
# API接口初始化
# --------------------------------
if ThreadPoolExecutor(max_workers=1).submit(NetChainReview().run).result():
    rc = RedisClient()
else:
    logger_local.warning("网络异常")
    easygui.msgbox("网络异常", title=TITLE)
    exit()

if __name__ == '__main__':
    V2RaycSpiderMasterPanel().home_menu()
Exemple #20
0
def sync_actions(
    class_: str,
    mode_sync: str = None,
    only_sync=False,
    beat_sync=True,
):
    """

    @param class_:
    @param mode_sync:  是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务
    @param only_sync:
    @param beat_sync:
    @return:
    """
    logger.info(
        f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...")

    # ================================================
    # 节拍停顿 原子同步
    # ================================================
    rc = RedisClient()
    _state = _is_overflow(task_name=class_, rc=rc)
    if _state == 'stop':
        return _state

    # ================================================
    # 更新任务信息
    # ================================================
    # 公示即将发动的采集任务数据
    _update_entropy(rc=rc, entropy=__entropy__)
    # 通由工厂读取映射表批量生产采集器运行实体
    sync_queue: list = ActionShunt(class_, silence=True,
                                   beat_sync=beat_sync).shunt()
    # 打乱任务序列
    random.shuffle(sync_queue)

    # ================================================
    # $执行核心业务
    # ================================================
    if mode_sync == 'upload':
        # fixme:临时方案:解决链接溢出问题
        if round(rc.get_len(REDIS_SECRET_KEY.format(class_)) *
                 1.25) > SINGLE_TASK_CAP:
            logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持")
            return None
        # 持续实例化采集任务
        for _ in range(sync_queue.__len__()):
            rc.sync_message_queue(mode='upload', message=class_)
            # 节拍同步线程锁
            if only_sync:
                logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务")
                break
        logger.success("<TaskManager> UploadTasks -- 任务上传完毕")
    elif mode_sync == 'download':
        async_queue: list = []
        while True:
            # 获取原子任务
            atomic = rc.sync_message_queue(mode='download')
            # 若原子有效则同步数据
            if atomic and atomic in CRAWLER_SEQUENCE:
                # 判断同步状态
                # 防止过载。当本地缓冲任务即将突破容载极限时停止同步
                # _state 状态有三,continue/offload/stop
                _state = _is_overflow(task_name=atomic, rc=rc)
                if _state != 'continue':
                    return _state
                if async_queue.__len__() == 0:
                    async_queue = ActionShunt(atomic,
                                              silence=True,
                                              beat_sync=beat_sync).shunt()
                    random.shuffle(async_queue)
                # 将采集器实体推送至Poseidon本机消息队列
                Middleware.poseidon.put_nowait(async_queue.pop())
                logger.info(
                    f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})'
                )
                # 节拍同步线程锁
                if only_sync:
                    logger.warning(
                        f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务"
                    )
                    return 'offload'
            else:
                return 'offload'
    elif mode_sync == 'force_run':
        for slave_ in sync_queue:
            # ================================================================================================
            # TODO v5.4.r 版本新增特性 scaffold spawn
            # 1. 之前版本中通由scaffold 无论运行 run 还是 force-run 指令都无法在队列满载的情况下启动采集任务
            # 主要原因在于如下几行代码加了锁
            # 2. 通过新增的spawn指令可绕过此模块通由SpawnBooster直接编译底层代码启动采集器
            # ================================================================================================
            # force_run :适用于单机部署或单步调试下
            # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数
            _state = _is_overflow(task_name=class_, rc=rc)
            if _state != 'continue':
                return _state

            # 将采集器实体推送至Poseidon本机消息队列
            Middleware.poseidon.put_nowait(slave_)

            # 节拍同步线程锁
            if only_sync:
                logger.warning(
                    f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                return 'stop'

        return 'offload'
Exemple #21
0
class SubscribesCleaner(CoroutineSpeedup):
    """解耦清洗插件:国内IP调用很可能出现性能滑坡"""
    def __init__(self, debug=False, kill_target: str = None):
        super(SubscribesCleaner, self).__init__()
        self.debug = debug
        self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE]
        self.rc = RedisClient().get_driver()
        self.kill_ = kill_target

    def offload_task(self):
        for key_ in self.keys:
            try:
                for sub, _ in self.rc.hgetall(key_).items():
                    self.work_q.put_nowait([sub, key_])
            except redis_error.ResponseError:
                logger.critical("Link pool is broken down.")

    def _del_subs(self, key_: str, subs: str, err_) -> None:
        try:
            self.rc.hdel(key_, subs)
            terminal_echo(f"detach -> {subs} {err_}", 3)
        except redis_error.ConnectionError:
            logger.critical(
                "<SubscribeCleaner> The local network communication is abnormal."
            )

    def control_driver(self, sub_info: List[str], threshold: int = 4):
        """

        :param sub_info: [subs,key_secret_class]
        :param threshold: 解耦置信阈值 小于或等于这个值的订阅将被剔除
        :return:
        """
        super(SubscribesCleaner, self).control_driver(task=sub_info)
        try:
            # 针对指定订阅源进行清洗工作
            if self.kill_ and self.kill_ in sub_info[0]:
                self._del_subs(sub_info[-1], sub_info[0],
                               "target active removal")
            else:
                # 解析订阅
                node_info: dict = subs2node(sub_info[0])
                # 订阅解耦
                if node_info['node'].__len__() <= threshold:
                    self._del_subs(sub_info[-1], sub_info[0],
                                   "decouple active removal")
                elif self.debug:
                    terminal_echo(
                        f"valid -- {node_info['subs']} -- {len(node_info['node'])}",
                        1)
        except (UnicodeDecodeError, TypeError) as e:
            # 对于已标记“解析错误”的订阅 更新其请求次数
            if self.temp_cache.get(sub_info[0]):
                self.temp_cache[sub_info[0]] += 1
            # 否则标记为“解析错误”的订阅
            else:
                terminal_echo(f"recheck -- {sub_info[0]}", 2)
                self.temp_cache[sub_info[0]] = 1
            # 若链接重试次数少于3次 重添加至任务队列尾部
            if self.temp_cache[sub_info[0]] <= 3:
                self.work_q.put_nowait(sub_info)
            # 若链接重试次数大于3次 剔除
            else:
                self._del_subs(sub_info[-1], sub_info[0], e)
        except SystemExit:
            warnings.warn("请关闭系统代理后部署订阅清洗任务")
        except Exception as e:
            logger.warning(f"{sub_info} -- {e}")
            self._del_subs(sub_info[-1], sub_info[0], e)

    def killer(self):
        if not self.debug:
            logger.success("<SubscribesCleaner> --> decouple compete.")
Exemple #22
0
def _sync_actions(
        class_: str,
        mode_sync: str = None,
        only_sync=False,
        beat_sync=True,
):
    """

    @param class_:
    @param mode_sync:  是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务
    @param only_sync:
    @param beat_sync:
    @return:
    """
    logger.info(f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...")

    # TODO 原子化同步行为
    rc = RedisClient()

    # 拷贝生成队列,需使用copy()完成拷贝,否则pop()会影响actions-list本体
    # [A-Cloud,B-Cloud, ...]
    task_list: list = actions.__all__.copy()
    random.shuffle(task_list)

    # 在本机环境中生成任务并加入消息队列
    if mode_sync == 'upload':

        # 临时方案,解决链接溢出问题
        if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) * 1.25) > SINGLE_TASK_CAP:
            logger.warning("<TaskManager> UploadHijack -- 连接池任务已溢出,上传任务被劫持")
            return None

        # 持续实例化采集任务
        while True:
            if task_list.__len__() == 0:
                logger.success("<TaskManager> EmptyList -- 本机任务为空或已完全生成")
                break
            else:
                slave_ = task_list.pop()

                # 将相应的任务执行语句转换成exec语法
                expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \
                       f'{slave_}(beat_sync={beat_sync}).run()'

                # 将执行语句同步至消息队列
                rc.sync_message_queue(mode='upload', message=expr)

                # 节拍同步线程锁
                if only_sync:
                    logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务")
                    break

        logger.info(f"<TaskManager> 本节点任务({actions.__all__.__len__()})已同步至消息队列,"
                    f"待集群接收订阅后既可完成后续任务")

    # 同步分布式消息队列的任务
    elif mode_sync == 'download':
        while True:

            # 判断同步状态
            # 防止过载。当本地缓冲任务即将突破容载极限时停止同步
            # _state 状态有三,continue/offload/stop
            _state = _is_overflow(task_name=class_, rc=rc)
            if _state != 'continue':
                return _state

            # 获取原子任务,该任务应已封装为exec语法
            # todo 将入队操作封装到redis里,以获得合理的循环退出条件
            atomic = rc.sync_message_queue(mode='download')

            # 若原子有效则同步数据
            if atomic:
                # 将执行语句推送至Poseidon本机消息队列
                Middleware.poseidon.put_nowait(atomic)
                logger.info(f'<TaskManager> offload atomic<{class_}>')

                # 节拍同步线程锁
                if only_sync:
                    logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                    return 'offload'

            # 否则打印警告日志并提前退出同步
            else:
                logger.warning(f"<TaskManager> SyncFinish -- <{class_}>无可同步任务")
                break

    elif mode_sync == 'force_run':
        for slave_ in task_list:

            # force_run :适用于单机部署或单步调试下
            _state = _is_overflow(task_name=class_, rc=rc)

            # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数
            if _state == 'stop':
                return 'stop'

            # 将相应的任务执行语句转换成exec语法
            expr = f'from src.BusinessLogicLayer.cluster.slavers.actions import {slave_}\n' \
                   f'{slave_}(beat_sync={beat_sync}).run()'

            # 将执行语句推送至Poseidon本机消息队列
            Middleware.poseidon.put_nowait(expr)

            # 在force_run模式下仍制约于节拍同步线程锁
            # 此举服务于主机的订阅补充操作
            # 优先级更高,不受队列可用容载影响强制中断同步操作
            if only_sync:
                logger.warning(f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                return 'stop'
        else:
            logger.success(f"<TaskManager> ForceCollect"
                           f" -- 已将本地预设任务({actions.__all__.__len__()})录入待执行队列")
            return 'offload'
Exemple #23
0
def _sync_actions(
    class_: str,
    mode_sync: str = None,
    only_sync=False,
    beat_sync=True,
):
    """

    @param class_:
    @param mode_sync:  是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务
    @param only_sync:
    @param beat_sync:
    @return:
    """
    logger.info(
        f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...")

    # TODO 原子化同步行为
    rc = RedisClient()

    # 节拍停顿
    _state = _is_overflow(task_name=class_, rc=rc)
    if _state == 'stop':
        return _state

    sync_queue: list = ActionShunt(class_, silence=True,
                                   beat_sync=beat_sync).shunt()
    random.shuffle(sync_queue)

    # 在本机环境中生成任务并加入消息队列
    if mode_sync == 'upload':

        # fixme:临时方案:解决链接溢出问题
        if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) *
                 1.25) > SINGLE_TASK_CAP:
            logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持")
            return None

        # 持续实例化采集任务
        for _ in range(sync_queue.__len__()):

            rc.sync_message_queue(mode='upload', message=class_)

            # 节拍同步线程锁
            if only_sync:
                logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务")
                break
        logger.success("<TaskManager> UploadTasks -- 任务上传完毕")

    # 同步分布式消息队列的任务
    elif mode_sync == 'download':
        async_queue: list = []

        while True:

            # 获取原子任务
            atomic = rc.sync_message_queue(mode='download')

            # 若原子有效则同步数据
            if atomic and atomic in CRAWLER_SEQUENCE:

                # 判断同步状态
                # 防止过载。当本地缓冲任务即将突破容载极限时停止同步
                # _state 状态有三,continue/offload/stop
                _state = _is_overflow(task_name=atomic, rc=rc)
                if _state != 'continue':
                    return _state

                if async_queue.__len__() == 0:
                    async_queue = ActionShunt(atomic,
                                              silence=True,
                                              beat_sync=beat_sync).shunt()
                    random.shuffle(async_queue)

                # 将执行语句推送至Poseidon本机消息队列
                Middleware.poseidon.put_nowait(async_queue.pop())

                logger.info(
                    f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})'
                )

                # 节拍同步线程锁
                if only_sync:
                    logger.warning(
                        f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务"
                    )
                    return 'offload'

            # 否则打印警告日志并提前退出同步
            else:
                # logger.warning(f"<TaskManager> SyncFinish -- <{atomic}>无可同步任务")
                return 'offload'

    elif mode_sync == 'force_run':
        for slave_ in sync_queue:

            # force_run :适用于单机部署或单步调试下
            # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数
            _state = _is_overflow(task_name=class_, rc=rc)
            if _state != 'continue':
                return _state

            # 将执行语句推送至Poseidon本机消息队列
            Middleware.poseidon.put_nowait(slave_)

            # 在force_run模式下仍制约于节拍同步线程锁
            # 此举服务于主机的订阅补充操作
            # 优先级更高,不受队列可用容载影响强制中断同步操作
            if only_sync:
                logger.warning(
                    f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                return 'stop'

        return 'offload'