Esempio n. 1
0
def _sync_actions(
    class_: str,
    mode_sync: str = None,
    only_sync=False,
    beat_sync=True,
):
    """

    @param class_:
    @param mode_sync:  是否同步消息队列。False:同步本机任务队列,True:同步Redis订阅任务
    @param only_sync:
    @param beat_sync:
    @return:
    """
    logger.info(
        f"<TaskManager> Sync{mode_sync.title()} || 正在同步<{class_}>任务队列...")

    # TODO 原子化同步行为
    rc = RedisClient()

    # 节拍停顿
    _state = _is_overflow(task_name=class_, rc=rc)
    if _state == 'stop':
        return _state

    sync_queue: list = ActionShunt(class_, silence=True,
                                   beat_sync=beat_sync).shunt()
    random.shuffle(sync_queue)

    # 在本机环境中生成任务并加入消息队列
    if mode_sync == 'upload':

        # fixme:临时方案:解决链接溢出问题
        if round(rc.__len__(REDIS_SECRET_KEY.format(class_)) *
                 1.25) > SINGLE_TASK_CAP:
            logger.warning("<TaskManager> UploadHijack -- 连接池任务即将溢出,上传任务被劫持")
            return None

        # 持续实例化采集任务
        for _ in range(sync_queue.__len__()):

            rc.sync_message_queue(mode='upload', message=class_)

            # 节拍同步线程锁
            if only_sync:
                logger.warning("<TaskManager> OnlySync -- 触发节拍同步线程锁,仅上传一枚原子任务")
                break
        logger.success("<TaskManager> UploadTasks -- 任务上传完毕")

    # 同步分布式消息队列的任务
    elif mode_sync == 'download':
        async_queue: list = []

        while True:

            # 获取原子任务
            atomic = rc.sync_message_queue(mode='download')

            # 若原子有效则同步数据
            if atomic and atomic in CRAWLER_SEQUENCE:

                # 判断同步状态
                # 防止过载。当本地缓冲任务即将突破容载极限时停止同步
                # _state 状态有三,continue/offload/stop
                _state = _is_overflow(task_name=atomic, rc=rc)
                if _state != 'continue':
                    return _state

                if async_queue.__len__() == 0:
                    async_queue = ActionShunt(atomic,
                                              silence=True,
                                              beat_sync=beat_sync).shunt()
                    random.shuffle(async_queue)

                # 将执行语句推送至Poseidon本机消息队列
                Middleware.poseidon.put_nowait(async_queue.pop())

                logger.info(
                    f'<TaskManager> offload atomic<{atomic}>({Middleware.poseidon.qsize()})'
                )

                # 节拍同步线程锁
                if only_sync:
                    logger.warning(
                        f"<TaskManager> OnlySync -- <{atomic}>触发节拍同步线程锁,仅下载一枚原子任务"
                    )
                    return 'offload'

            # 否则打印警告日志并提前退出同步
            else:
                # logger.warning(f"<TaskManager> SyncFinish -- <{atomic}>无可同步任务")
                return 'offload'

    elif mode_sync == 'force_run':
        for slave_ in sync_queue:

            # force_run :适用于单机部署或单步调试下
            # 需要确保无溢出风险,故即使是force_run的启动模式,任务执行数也不应逾越任务容载数
            _state = _is_overflow(task_name=class_, rc=rc)
            if _state != 'continue':
                return _state

            # 将执行语句推送至Poseidon本机消息队列
            Middleware.poseidon.put_nowait(slave_)

            # 在force_run模式下仍制约于节拍同步线程锁
            # 此举服务于主机的订阅补充操作
            # 优先级更高,不受队列可用容载影响强制中断同步操作
            if only_sync:
                logger.warning(
                    f"<TaskManager> OnlySync -- <{class_}>触发节拍同步线程锁,仅下载一枚原子任务")
                return 'stop'

        return 'offload'
Esempio n. 2
0
def apis_admin_get_entropy() -> list:
    return RedisClient().get_driver().get(
        REDIS_SECRET_KEY.format("__entropy__")).split("$")
Esempio n. 3
0
 def __init__(self, debug=False, kill_target: str = None):
     super(SubscribesCleaner, self).__init__()
     self.debug = debug
     self.keys = [REDIS_SECRET_KEY.format(s) for s in CRAWLER_SEQUENCE]
     self.rc = RedisClient().get_driver()
     self.kill_ = kill_target
Esempio n. 4
0
def select_subs_to_admin(select_netloc: str = None, _debug=False) -> dict:
    # 池内所有类型订阅
    remain_subs = []
    # 订阅池状态映射表
    mapping_subs_status = {}
    # 链接-类型映射表
    mapping_subs_type = {}
    rc = RedisClient()
    # 清洗数据
    for filed in CRAWLER_SEQUENCE:
        # 提取池内对应类型的所有订阅链接
        filed_subs: list = RedisClient().sync_remain_subs(
            REDIS_SECRET_KEY.format(filed))
        # 更新汇总队列
        remain_subs += filed_subs
        # 提取subs netloc映射区间
        urls = [urlparse(i[0]).netloc for i in filed_subs]
        # 更新映射表
        mapping_subs_status.update({filed: dict(Counter(urls))})
        mapping_subs_type.update(
            zip([i[0] for i in filed_subs], [
                filed,
            ] * len(filed_subs)))
    # 初始化状态下,返回订阅池状态
    if not select_netloc:
        rc.update_api_status(api_name="search",
                             date_format=str(datetime.now(TIME_ZONE_CN)))
        return {'msg': 'success', 'info': mapping_subs_status}
    for tag in remain_subs:
        # 提取信息键
        subscribe, end_life = tag[0], tag[-1]
        # 存在对应netloc的链接并可存活至少beyond小时
        if select_netloc in urlparse(subscribe).netloc and not RedisClient(
        ).is_stale(end_life, beyond=6):
            logger.debug("<SubscribeIO> -- GET SUBSCRIPTION")
            rc.update_api_status(api_name="get",
                                 date_format=str(datetime.now(TIME_ZONE_CN)))
            try:
                return {
                    'msg': "success",
                    'debug': _debug,
                    'info': {
                        "subscribe": subscribe,
                        "endLife": end_life,
                        'subsType': mapping_subs_type[subscribe],
                        "netloc": select_netloc
                    }
                }
            finally:
                if not _debug:
                    threading.Thread(target=detach,
                                     kwargs={
                                         "subscribe": subscribe,
                                         'beat_sync': True
                                     }).start()
            # 无库存或误码
    return {
        'msg': "failed",
        "netloc": select_netloc,
        "info": "指令错误或不存在该类型订阅",
        "status": mapping_subs_status
    }