Example #1
0
File: base.py Project: howie6879/2c
    def save_backup(self, doc_source: str, doc_source_name: str, doc_name: str) -> bool:
        """保存文件备份状态

        Args:
            doc_source (str): 文章获取源
            doc_source_name (str): 文章源
            doc_name (str): 文章名字
        Returns:
            bool: 是否成功
        """
        file_msg = f"{doc_source}/{doc_source_name}/{doc_name}"
        try:
            filter_dict = {
                "backup_type": self.backup_type,
                "doc_source": doc_source,
                "doc_source_name": doc_source_name,
                "doc_name": doc_name,
            }
            update_data = {"$set": {**filter_dict, **{"ts": int(time.time())}}}
            self.bak_coll.update_one(
                filter=filter_dict, update=update_data, upsert=True
            )
            LOGGER.info(f"Backup({self.backup_type}): 文章 {file_msg} 状态保存成功!")
        except Exception as e:
            LOGGER.error(f"Backup({self.backup_type}): 文章 {file_msg} 状态保存失败!{e}")
Example #2
0
 def send(self, send_data) -> bool:
     """
     下发到Bark终端
     :param send_data: 下发内容字典,字段开发者自定义
     :return:
     """
     doc_name = send_data["doc_name"]
     # doc_source = send_data["doc_source"]
     doc_link = send_data["doc_link"]
     doc_cus_des = send_data["doc_cus_des"]
     doc_source_name = send_data["doc_source_name"]
     doc_id = send_data["doc_id"]
     is_send = self.is_send(doc_id=doc_id)
     send_status = True
     notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}"
     if not is_send:
         url = self.compose(send_data)
         resp = requests.post(url)
         if resp.status_code == 200 and json.loads(
                 resp.text)["code"] == 200:
             # 将状态持久化到数据库
             self.sl_coll.insert_one({
                 "send_type": self.send_type,
                 "doc_id": doc_id,
                 "ts": int(time.time()),
             })
             # 下发成功
             LOGGER.info(f"{notice_msg} 成功!")
         else:
             errmsg = json.loads(resp.text)["code"]
             LOGGER.error(f"{notice_msg} 失败:{errmsg}")
             send_status = False
     return send_status
Example #3
0
File: base.py Project: howie6879/2c
    def delete_backup(
        self, doc_source: str, doc_source_name: str, doc_name: str
    ) -> bool:
        """删除文件分备份状态

        Args:
            doc_source (str): 文章获取源
            doc_source_name (str): 文章源
            doc_name (str): 文章名字
        Returns:
            bool: 是否成功
        """
        file_msg = f"{doc_source}/{doc_source_name}/{doc_name}"
        try:
            self.bak_coll.delete_one(
                {
                    "backup_type": self.backup_type,
                    "doc_source": doc_source,
                    "doc_source_name": doc_source_name,
                    "doc_name": doc_name,
                }
            )
            LOGGER.info(f"Backup({self.backup_type}): 文章 {file_msg} 状态删除成功!")
        except Exception as e:
            LOGGER.error(f"Backup({self.backup_type}): 文章 {file_msg} 状态删除失败!{e}")
Example #4
0
def create_app():
    """
    建立web应用
    url: http://flask.pocoo.org/docs/1.0/quickstart/
    :return:
    """
    flask_app = Flask(__name__)

    with flask_app.app_context():
        # 项目内部配置
        mongodb_base = MongodbManager.get_mongo_base(
            mongodb_config=Config.MONGODB_CONFIG)
        app_loop = asyncio.get_event_loop()
        flask_app.config["app_config"] = Config
        flask_app.config["app_logger"] = LOGGER
        flask_app.config["app_loop"] = app_loop
        flask_app.config["mongodb_base"] = mongodb_base

        # 每次启动先保证公众号名称爬虫运行成功
        # spider = run_wechat_name_spider(loop=app_loop)
        # if spider.success_counts == 1:
        #     # 爬虫运行成功
        #     LOGGER.info("Wechat spider started successfully :)")
        LOGGER.info("API started successfully :)")

    flask_app.register_blueprint(bp_api)
    return flask_app
Example #5
0
    def delete(self, doc_source: str, doc_source_name: str, doc_name: str) -> bool:
        """删除某个文件

        Args:
            doc_source (str): 文章获取源
            doc_source_name (str): 文章源
            doc_name (str): 文章名字
        Returns:
            bool: 是否成功
        """
        file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html"
        db_res = mongodb_delete_many_data(
            coll_conn=self.liuli_backup_coll,
            filter_dict={
                "doc_source": doc_source,
                "doc_source_name": doc_source_name,
                "doc_name": doc_name,
            },
        )
        op_res = True
        if db_res["status"]:
            LOGGER.info(f"Backup({self.backup_type}): {file_path} 删除成功!")
            # 删除当前文章状态
            self.delete_backup(
                doc_source=doc_source,
                doc_source_name=doc_source_name,
                doc_name=doc_name,
            )
        else:
            op_res = False
            LOGGER.error(
                f"Backup({self.backup_type}): {file_path} 删除失败!{db_res['info']}"
            )
        return op_res
Example #6
0
def run_liuli_schedule(ll_config_name: str = "default"):
    """调度启动函数

    Args:
        task_config (dict): 调度任务配置
    """
    ll_config_path = os.path.join(Config.LL_CONFIG_DIR,
                                  f"{ll_config_name}.json")
    with open(ll_config_path, "r", encoding="utf-8") as load_f:
        ll_config = json.load(load_f)

    schdule_time_list = ll_config["schedule"].get("period_list",
                                                  ["00:10", "12:10", "21:10"])
    for each in schdule_time_list:
        schedule.every().day.at(each).do(run_liuli_task, ll_config)

    name: str = ll_config["name"]
    author: str = ll_config["author"]
    start_info = f"Schedule({Config.SCHEDULE_VERSION}) task({name}@{author}) started successfully :)"
    LOGGER.info(start_info)
    schdule_msg = f"Task({name}@{author}) schedule time:\n " + "\n ".join(
        schdule_time_list)
    LOGGER.info(schdule_msg)
    # 启动就执行一次
    run_liuli_task(ll_config)
    while True:
        schedule.run_pending()
        time.sleep(1)
Example #7
0
    def delete(self, doc_source: str, doc_source_name: str,
               doc_name: str) -> bool:
        """删除某个文件

        Args:
            doc_source (str): 文章获取源
            doc_source_name (str): 文章源
            doc_name (str): 文章名字
        Returns:
            bool: 是否成功
        """
        file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html"
        op_res = True
        try:
            contents = self.repo.get_contents(file_path)
            _ = self.repo.delete_file(contents.path, f"Remove {file_path}",
                                      contents.sha)
            LOGGER.info(f"Backup({self.backup_type}): {file_path} 删除成功!")
            # 删除当前文章状态
            self.delete_backup(
                doc_source=doc_source,
                doc_source_name=doc_source_name,
                doc_name=doc_name,
            )
        except Exception as e:
            op_res = False
            LOGGER.error(f"Backup({self.backup_type}): {file_path} 删除失败!{e}")
        return op_res
Example #8
0
    def save(self, backup_data: dict) -> bool:
        """执行备份动作

        Args:
            backup_data (dict): 备份数据

        Returns:
            bool: 是否成功
        """
        # 以下字段必须存在
        doc_source = backup_data["doc_source"]
        doc_source_name = backup_data["doc_source_name"]
        doc_name = backup_data["doc_name"]
        # 源文件
        doc_html = backup_data["doc_html"]

        file_msg = f"{doc_source}/{doc_source_name}/{doc_name}"
        file_path = f"{file_msg}.html"
        is_backup = self.is_backup(
            doc_source=doc_source,
            doc_source_name=doc_source_name,
            doc_name=doc_name,
        )

        # 未备份过或者强制备份下将继续执行
        if not is_backup or self.force_backup:
            filter_dict = {
                "doc_source": doc_source,
                "doc_source_name": doc_source_name,
                "doc_name": doc_name,
            }
            update_data = {
                "$set": {
                    **filter_dict,
                    **{"ts": int(time.time()), "content": text_compress(doc_html)},
                }
            }
            db_update_res = mongodb_update_data(
                coll_conn=self.liuli_backup_coll,
                filter_dict=filter_dict,
                update_data=update_data,
                upsert=True,
            )
            if db_update_res["status"]:
                msg = f"Backup({self.backup_type}): {file_path} 上传成功!"
                # 保存当前文章状态
                self.save_backup(
                    doc_source=doc_source,
                    doc_source_name=doc_source_name,
                    doc_name=doc_name,
                )
            else:
                msg = f"Backup({self.backup_type}): {file_path} 上传失败!{db_update_res['info']}"

        else:
            msg = f"Backup({self.backup_type}): {file_path} 已存在!"
        LOGGER.info(msg)
Example #9
0
def send_post_request(url, data: dict = None, **kwargs) -> dict:
    """
    发起post请求
    :param url: 请求目标地址
    :param data: 请求参数
    :param kwargs:
    :return:
    """
    try:
        resp_dict = requests.post(url, data=json.dumps(data), **kwargs).json()
    except Exception as e:
        resp_dict = {}
        LOGGER.error(f"请求出错:{e}")
    return resp_dict
Example #10
0
def send_factory(send_type: str, send_config: dict, send_data: dict) -> bool:
    """
    分发器工厂函数
    :param send_type: 下发终端类型
    :param send_config: 下发终端配置
    :param send_data: 下发内容字典,字段开发者自定义
    :return:
    """
    send_status = False
    try:
        send_module = import_module(f"src.sender.{send_type}_sender")
        send_status = send_module.send(send_config, send_data)
    except ModuleNotFoundError:
        LOGGER.error(f"目标终端类型不存在 {send_type} - {send_config} - {send_data}")
    return send_status
Example #11
0
def collect_factory(collect_type: str, collect_config: dict) -> bool:
    """
    采集器工厂函数
    :param collect_type: 采集器类型
    :param collect_config: 采集器配置
    :return:
    """
    collect_status = False
    try:
        collect_module = import_module(f"src.collector.{collect_type}")
        collect_status = collect_module.run(collect_config)
    except ModuleNotFoundError as e:
        LOGGER.error(f"采集器类型不存在 {collect_type} - {collect_config} -{e}")
    except Exception as e:
        LOGGER.error(f"采集器执行出错 {collect_type} - {collect_config} - {e}")
    return collect_status
Example #12
0
def send_get_request(url: str, params: dict = None, timeout: int = 3, **kwargs):
    """发起GET请求

    Args:
        url (str): 目标地址
        params (dict, optional): 请求参数. Defaults to None.
        timeout (int, optional): 超时时间. Defaults to 3.

    Returns:
        [type]: [description]
    """
    try:
        resp = requests.get(url, params, timeout=timeout, **kwargs)
    except Exception as e:
        resp = None
        LOGGER.exception(f"请求出错 - {url} - {str(e)}")
    return resp
Example #13
0
def get_html_by_requests(url: str, params: dict = None, timeout: int = 3, **kwargs):
    """发起GET请求,获取文本

    Args:
        url (str): 目标网页
        params (dict, optional): 请求参数. Defaults to None.
        timeout (int, optional): 超时时间. Defaults to 3.
    """
    resp = send_get_request(url=url, params=params, timeout=timeout, **kwargs)
    text = None
    try:
        content = resp.content
        charset = cchardet.detect(content)
        text = content.decode(charset["encoding"])
    except Exception as e:
        LOGGER.exception(f"请求内容提取出错 - {url} - {str(e)}")
    return text
Example #14
0
    def send_text_card(self, send_data):
        """
        发送卡片消息
        :param send_data:
        :return:
        """
        doc_name = send_data["doc_name"]
        doc_source = send_data["doc_source"]
        doc_link = send_data["doc_link"]
        doc_content = send_data["doc_content"]
        doc_cus_des = send_data["doc_cus_des"]
        doc_source_name = send_data["doc_source_name"]
        doc_keywords = send_data["doc_keywords"]
        doc_ts = send_data["doc_ts"]
        doc_date = time.strftime("%Y-%m-%d", time.localtime(doc_ts))

        doc_des_info = f"亲,来自 {doc_source} 源的 {doc_source_name} 有更新啦! \n\n{doc_content}\n\n文章关键字:{doc_keywords}"
        doc_des = f'<div class="black">{doc_date} | {doc_cus_des}</div>\n<div class="normal">{doc_des_info}</div>\n来自[2c]👉技术支持❤️'

        data = {
            "toparty": 1,
            "msgtype": "textcard",
            "agentid": self.wecom_agent_id,
            "textcard": {
                "title": f"[{doc_source_name}]{doc_name}",
                "description": doc_des,
                "url": doc_link,
                "btntxt": "更多",
            },
            "safe": 0,
        }
        data = json.dumps(data, ensure_ascii=False)
        try:
            resp_dict = requests.post(
                url=self.url,
                data=data.encode("utf-8").decode("latin1"),
                headers={
                    "Content-Type": "application/json"
                },
            ).json()
            return resp_dict
        except Exception as e:
            resp_dict = {}
            LOGGER.error(f"请求出错:{e}")
        return resp_dict
Example #15
0
def backup_factory(backup_type: str, init_config: dict) -> BackupBase:
    """
    备份器工厂函数
    :param backup_type: 备份类型
    :param init_config: 备份配置
    :return:
    """
    backup_ins = None
    try:
        backup_class_name = f"{backup_type}_backup"
        backup_module = import_module(f"src.backup.{backup_class_name}")
        # 备份类实例化
        backup_ins = getattr(
            backup_module,
            string_camelcase(backup_class_name))(init_config=init_config)
    except ModuleNotFoundError as e:
        LOGGER.error(f"目标备份类型不存在 {backup_type} - {init_config} - {e}")
    return backup_ins
Example #16
0
def send_post_request(url: str, data: dict = None, timeout: int = 5, **kwargs) -> dict:
    """发起post请求

    Args:
        url (str): 目标地址
        data (dict, optional): 请求参数. Defaults to None.
        timeout (int, optional): 超时时间. Defaults to 5.

    Returns:
        dict: [description]
    """
    try:
        resp_dict = requests.post(
            url, data=json.dumps(data), timeout=timeout, **kwargs
        ).json()
    except Exception as e:
        resp_dict = {}
        LOGGER.error(f"请求出错:{e}")
    return resp_dict
Example #17
0
    def send(self, send_data) -> bool:
        """
        下发到钉钉终端
        :param send_data: 下发内容字典,字段开发者自定义
        :return:
        """
        doc_name = send_data["doc_name"]
        doc_cus_des = send_data["doc_cus_des"]
        doc_id = send_data["doc_id"]
        doc_link = send_data["doc_link"]
        doc_source_name = send_data["doc_source_name"]
        is_send = self.is_send(doc_id=doc_id)
        send_status = True
        if not is_send:
            # 开始进行下发
            resp_dict = self.send_text_card(send_data=send_data)
            notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}"
            if resp_dict:
                if resp_dict.get("errcode") == 0:
                    # 将状态持久化到数据库
                    self.sl_coll.insert_one({
                        "send_type": self.send_type,
                        "doc_id": doc_id,
                        "ts": time.time(),
                    })

                    # 下发成功
                    LOGGER.info(f"{notice_msg} 成功!")
                    send_status = True
                else:
                    LOGGER.error(f"{notice_msg} 失败:{resp_dict.get('errmsg')}")
            else:
                LOGGER.error(f"{notice_msg} 失败!")

        return send_status
Example #18
0
    def send(self, send_data) -> bool:
        """
        下发到钉钉终端
        :param send_data: 下发内容字典,字段开发者自定义
        :return:
        """
        doc_id = send_data["doc_id"]
        doc_name = send_data["doc_name"]
        doc_source = send_data["doc_source"]
        doc_link = send_data["doc_link"]
        doc_cus_des = send_data["doc_cus_des"]
        doc_source_name = send_data["doc_source_name"]
        doc_keywords = send_data["doc_keywords"]
        is_send = self.is_send(doc_id=doc_id)
        doc_date = send_data["doc_date"]
        send_status = True
        if not is_send:
            # 开始进行下发
            # data = {
            #     "msgtype": "link",
            #     "link": {
            #         "text": f"[liuli]{doc_source_name}: {doc_cus_des}\n亲,{doc_source} 源有更新\n",
            #         "title": doc_name,
            #         "picUrl": "",
            #         "messageUrl": doc_link,
            #     },
            # }
            data = {
                "msgtype": "markdown",
                "markdown": {
                    "text":
                    f"## [{doc_name}]({doc_link})\n\n**{doc_source_name}** | **{doc_date}** | **{doc_cus_des}** \n\n-----\n\n> 文章关键字:{doc_keywords}\n\n-----\n\n识别错误?点击[广告反馈](https://github.com/howie6879/liuli/issues/4)  👉来自[liuli](https://github.com/howie6879/liuli)技术支持❤️",
                    "title": f"亲,{doc_source} 源有更新啦!👉{doc_name} ",
                },
            }
            resp_dict = send_post_request(
                url=self.url,
                data=data,
                headers={"Content-Type": "application/json"})
            notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}"
            if resp_dict:
                if resp_dict.get("errmsg") == "ok":
                    # 将状态持久化到数据库
                    self.sl_coll.insert_one({
                        "send_type": self.send_type,
                        "doc_id": doc_id,
                        "ts": int(time.time()),
                    })
                    # 下发成功
                    LOGGER.info(f"{notice_msg} 成功!")
                else:
                    LOGGER.error(f"{notice_msg} 失败:{resp_dict.get('errmsg')}")
                    send_status = False
            else:
                LOGGER.error(f"{notice_msg} 失败!")
                send_status = False

        return send_status
Example #19
0
    def save(self, backup_data: dict) -> bool:
        """执行备份动作

        Args:
            backup_data (dict): 备份数据

        Returns:
            bool: 是否成功
        """
        # 以下字段必须存在
        doc_source = backup_data["doc_source"]
        doc_source_name = backup_data["doc_source_name"]
        doc_name = backup_data["doc_name"]
        # 源文件
        doc_html = backup_data["doc_html"]

        file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html"
        is_backup = self.is_backup(
            doc_source=doc_source,
            doc_source_name=doc_source_name,
            doc_name=doc_name,
        )

        # 在数据库存在就默认线上必定存在,希望用户不操作这个仓库造成状态不同步
        if not is_backup or self.force_backup:
            # 上传前做是否存在检测,没有备份过继续远程备份
            # 已存在的但是数据库没有状态需要重新同步
            try:
                # 先判断文件是否存在
                try:
                    contents = self.repo.get_contents(file_path)
                    # 存在就更新
                    self.repo.update_file(contents.path, f"Update {file_path}",
                                          doc_html, contents.sha)
                except Exception as e:
                    # 不存在就上传
                    self.repo.create_file(file_path, f"Add {file_path}",
                                          doc_html)

                LOGGER.info(f"Backup({self.backup_type}): {file_path} 上传成功!")
                # 保存当前文章状态
                self.save_backup(
                    doc_source=doc_source,
                    doc_source_name=doc_source_name,
                    doc_name=doc_name,
                )
            except GithubException as e:
                LOGGER.error(
                    f"Backup({self.backup_type}): {file_path} 上传失败!{e}")
        else:
            LOGGER.info(f"Backup({self.backup_type}): {file_path} 已存在!")
Example #20
0
    def post(self):
        from flask_restful import request

        LOGGER.info(f'Received request {request}')
        LOGGER.info(f'Request JSON data: {request.get_json()}')

        message_attrs = parse_request(SlackMessageSchema, request)

        token = self.get_token()
        slack_client = self.setup_slack_client(token)
        response = slack_client.chat_postMessage(**message_attrs)
        LOGGER.info(f'Slack client response: {response}')
        return make_response(
            jsonify(code=response.status_code, data=response.data),
            response.status_code)
Example #21
0
    def send(self, send_data) -> bool:
        """
        下发到Telegram
        :param send_data: 下发内容字典,字段开发者自定义
        :return:
        """
        doc_id = send_data["doc_id"]
        doc_name = send_data["doc_name"]
        doc_link = send_data["doc_link"]
        doc_cus_des = send_data["doc_cus_des"]
        doc_source_name = send_data["doc_source_name"]
        is_send = self.is_send(doc_id=doc_id)

        send_status = True
        if not is_send:
            message = TG_BOT_MSG_TEMPLATE.format_map(send_data)
            data = {
                "chat_id": self.chat_id,
                "text": message,
                "parse_mode": "HTML",
                "disable_web_page_preview": "yes",
            }
            resp_dict = send_post_request(
                url=self.url,
                data=data,
                headers={"Content-Type": "application/json"},
                timeout=5,
            )
            notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}"
            if resp_dict:
                if resp_dict.get("ok") is True:
                    # 将状态持久化到数据库
                    self.sl_coll.insert_one({
                        "send_type": self.send_type,
                        "doc_id": doc_id,
                        "ts": int(time.time()),
                    })
                    # 下发成功
                    LOGGER.info(f"{notice_msg} 成功!")
                else:
                    LOGGER.error(f"{notice_msg} 失败:{resp_dict.get('errmsg')}")
                    send_status = False
            else:
                LOGGER.error(f"{notice_msg} 失败!")
                send_status = False

        return send_status
Example #22
0
def run_liuli_task(ll_config: dict):
    """执行调度任务

    Args:
        ll_config (dict): Liuli 任务配置
    """
    try:
        # 防止内部函数篡改
        ll_config_data = deepcopy(ll_config)
        # 文章源, 用于基础查询条件
        doc_source: str = ll_config_data["doc_source"]
        basic_filter = {"basic_filter": {"doc_source": doc_source}}
        # 采集器配置
        collector_conf: dict = ll_config_data["collector"]
        # 处理器配置
        processor_conf: dict = ll_config_data["processor"]
        # 分发器配置
        sender_conf: dict = ll_config_data["sender"]
        sender_conf.update(basic_filter)
        # 备份器配置
        backup_conf: dict = ll_config_data["backup"]
        backup_conf.update(basic_filter)

        # 采集器执行
        LOGGER.info("采集器开始执行!")
        for collect_type, collect_config in collector_conf.items():
            collect_factory(collect_type, collect_config)
        LOGGER.info("采集器执行完毕!")
        # 采集器执行
        LOGGER.info("处理器(after_collect): 开始执行!")
        for each in processor_conf["after_collect"]:
            func_name = each.get("func")
            # 注入查询条件
            each.update(basic_filter)
            LOGGER.info(f"处理器(after_collect): {func_name} 正在执行...")
            processor_dict[func_name](**each)
        LOGGER.info("处理器(after_collect): 执行完毕!")
        # 分发器执行
        LOGGER.info("分发器开始执行!")
        send_doc(sender_conf)
        LOGGER.info("分发器执行完毕!")
        # 备份器执行
        LOGGER.info("备份器开始执行!")
        backup_doc(backup_conf)
        LOGGER.info("备份器执行完毕!")
    except Exception as e:
        LOGGER.error(f"执行失败!{e}")
Example #23
0
from src.utils import LOGGER


def schedule_task():
    """
    更新持久化订阅的公众号最新文章
    :return:
    """
    # 抓取最新的文章,然后持久化到数据库
    update_wechat_doc()
    # 更新广告标签
    update_ads_tag()
    # 文章分发
    send_doc()


if __name__ == "__main__":
    # 初次启动执行即可
    run_wechat_name_spider()
    # 每日抓取公众号最新文章并更新广告标签
    schedule.every().day.at("07:10").do(schedule_task)
    schedule.every().day.at("11:10").do(schedule_task)
    schedule.every().day.at("16:10").do(schedule_task)
    schedule.every().day.at("20:10").do(schedule_task)
    schedule.every().day.at("23:10").do(schedule_task)
    LOGGER.info("Schedule started successfully :)")
    LOGGER.info("Schedule time:\n 07:10 \n 11:10 \n 16:10 \n 20:10 \n 23:10")
    while True:
        schedule.run_pending()
        time.sleep(1)