def delete(self, doc_source: str, doc_source_name: str, doc_name: str) -> bool: """删除某个文件 Args: doc_source (str): 文章获取源 doc_source_name (str): 文章源 doc_name (str): 文章名字 Returns: bool: 是否成功 """ file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html" op_res = True try: contents = self.repo.get_contents(file_path) _ = self.repo.delete_file(contents.path, f"Remove {file_path}", contents.sha) LOGGER.info(f"Backup({self.backup_type}): {file_path} 删除成功!") # 删除当前文章状态 self.delete_backup( doc_source=doc_source, doc_source_name=doc_source_name, doc_name=doc_name, ) except Exception as e: op_res = False LOGGER.error(f"Backup({self.backup_type}): {file_path} 删除失败!{e}") return op_res
def run_liuli_schedule(ll_config_name: str = "default"): """调度启动函数 Args: task_config (dict): 调度任务配置 """ ll_config_path = os.path.join(Config.LL_CONFIG_DIR, f"{ll_config_name}.json") with open(ll_config_path, "r", encoding="utf-8") as load_f: ll_config = json.load(load_f) schdule_time_list = ll_config["schedule"].get("period_list", ["00:10", "12:10", "21:10"]) for each in schdule_time_list: schedule.every().day.at(each).do(run_liuli_task, ll_config) name: str = ll_config["name"] author: str = ll_config["author"] start_info = f"Schedule({Config.SCHEDULE_VERSION}) task({name}@{author}) started successfully :)" LOGGER.info(start_info) schdule_msg = f"Task({name}@{author}) schedule time:\n " + "\n ".join( schdule_time_list) LOGGER.info(schdule_msg) # 启动就执行一次 run_liuli_task(ll_config) while True: schedule.run_pending() time.sleep(1)
def send(self, send_data) -> bool: """ 下发到Bark终端 :param send_data: 下发内容字典,字段开发者自定义 :return: """ doc_name = send_data["doc_name"] # doc_source = send_data["doc_source"] doc_link = send_data["doc_link"] doc_cus_des = send_data["doc_cus_des"] doc_source_name = send_data["doc_source_name"] doc_id = send_data["doc_id"] is_send = self.is_send(doc_id=doc_id) send_status = True notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}" if not is_send: url = self.compose(send_data) resp = requests.post(url) if resp.status_code == 200 and json.loads( resp.text)["code"] == 200: # 将状态持久化到数据库 self.sl_coll.insert_one({ "send_type": self.send_type, "doc_id": doc_id, "ts": int(time.time()), }) # 下发成功 LOGGER.info(f"{notice_msg} 成功!") else: errmsg = json.loads(resp.text)["code"] LOGGER.error(f"{notice_msg} 失败:{errmsg}") send_status = False return send_status
def delete_backup( self, doc_source: str, doc_source_name: str, doc_name: str ) -> bool: """删除文件分备份状态 Args: doc_source (str): 文章获取源 doc_source_name (str): 文章源 doc_name (str): 文章名字 Returns: bool: 是否成功 """ file_msg = f"{doc_source}/{doc_source_name}/{doc_name}" try: self.bak_coll.delete_one( { "backup_type": self.backup_type, "doc_source": doc_source, "doc_source_name": doc_source_name, "doc_name": doc_name, } ) LOGGER.info(f"Backup({self.backup_type}): 文章 {file_msg} 状态删除成功!") except Exception as e: LOGGER.error(f"Backup({self.backup_type}): 文章 {file_msg} 状态删除失败!{e}")
def send(self, send_data) -> bool: """ 下发到钉钉终端 :param send_data: 下发内容字典,字段开发者自定义 :return: """ doc_name = send_data["doc_name"] doc_cus_des = send_data["doc_cus_des"] doc_id = send_data["doc_id"] doc_link = send_data["doc_link"] doc_source_name = send_data["doc_source_name"] is_send = self.is_send(doc_id=doc_id) send_status = True if not is_send: # 开始进行下发 resp_dict = self.send_text_card(send_data=send_data) notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}" if resp_dict: if resp_dict.get("errcode") == 0: # 将状态持久化到数据库 self.sl_coll.insert_one({ "send_type": self.send_type, "doc_id": doc_id, "ts": time.time(), }) # 下发成功 LOGGER.info(f"{notice_msg} 成功!") send_status = True else: LOGGER.error(f"{notice_msg} 失败:{resp_dict.get('errmsg')}") else: LOGGER.error(f"{notice_msg} 失败!") return send_status
def save_backup(self, doc_source: str, doc_source_name: str, doc_name: str) -> bool: """保存文件备份状态 Args: doc_source (str): 文章获取源 doc_source_name (str): 文章源 doc_name (str): 文章名字 Returns: bool: 是否成功 """ file_msg = f"{doc_source}/{doc_source_name}/{doc_name}" try: filter_dict = { "backup_type": self.backup_type, "doc_source": doc_source, "doc_source_name": doc_source_name, "doc_name": doc_name, } update_data = {"$set": {**filter_dict, **{"ts": int(time.time())}}} self.bak_coll.update_one( filter=filter_dict, update=update_data, upsert=True ) LOGGER.info(f"Backup({self.backup_type}): 文章 {file_msg} 状态保存成功!") except Exception as e: LOGGER.error(f"Backup({self.backup_type}): 文章 {file_msg} 状态保存失败!{e}")
def delete(self, doc_source: str, doc_source_name: str, doc_name: str) -> bool: """删除某个文件 Args: doc_source (str): 文章获取源 doc_source_name (str): 文章源 doc_name (str): 文章名字 Returns: bool: 是否成功 """ file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html" db_res = mongodb_delete_many_data( coll_conn=self.liuli_backup_coll, filter_dict={ "doc_source": doc_source, "doc_source_name": doc_source_name, "doc_name": doc_name, }, ) op_res = True if db_res["status"]: LOGGER.info(f"Backup({self.backup_type}): {file_path} 删除成功!") # 删除当前文章状态 self.delete_backup( doc_source=doc_source, doc_source_name=doc_source_name, doc_name=doc_name, ) else: op_res = False LOGGER.error( f"Backup({self.backup_type}): {file_path} 删除失败!{db_res['info']}" ) return op_res
def create_app(): """ 建立web应用 url: http://flask.pocoo.org/docs/1.0/quickstart/ :return: """ flask_app = Flask(__name__) with flask_app.app_context(): # 项目内部配置 mongodb_base = MongodbManager.get_mongo_base( mongodb_config=Config.MONGODB_CONFIG) app_loop = asyncio.get_event_loop() flask_app.config["app_config"] = Config flask_app.config["app_logger"] = LOGGER flask_app.config["app_loop"] = app_loop flask_app.config["mongodb_base"] = mongodb_base # 每次启动先保证公众号名称爬虫运行成功 # spider = run_wechat_name_spider(loop=app_loop) # if spider.success_counts == 1: # # 爬虫运行成功 # LOGGER.info("Wechat spider started successfully :)") LOGGER.info("API started successfully :)") flask_app.register_blueprint(bp_api) return flask_app
def send(self, send_data) -> bool: """ 下发到钉钉终端 :param send_data: 下发内容字典,字段开发者自定义 :return: """ doc_id = send_data["doc_id"] doc_name = send_data["doc_name"] doc_source = send_data["doc_source"] doc_link = send_data["doc_link"] doc_cus_des = send_data["doc_cus_des"] doc_source_name = send_data["doc_source_name"] doc_keywords = send_data["doc_keywords"] is_send = self.is_send(doc_id=doc_id) doc_date = send_data["doc_date"] send_status = True if not is_send: # 开始进行下发 # data = { # "msgtype": "link", # "link": { # "text": f"[liuli]{doc_source_name}: {doc_cus_des}\n亲,{doc_source} 源有更新\n", # "title": doc_name, # "picUrl": "", # "messageUrl": doc_link, # }, # } data = { "msgtype": "markdown", "markdown": { "text": f"## [{doc_name}]({doc_link})\n\n**{doc_source_name}** | **{doc_date}** | **{doc_cus_des}** \n\n-----\n\n> 文章关键字:{doc_keywords}\n\n-----\n\n识别错误?点击[广告反馈](https://github.com/howie6879/liuli/issues/4) 👉来自[liuli](https://github.com/howie6879/liuli)技术支持❤️", "title": f"亲,{doc_source} 源有更新啦!👉{doc_name} ", }, } resp_dict = send_post_request( url=self.url, data=data, headers={"Content-Type": "application/json"}) notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}" if resp_dict: if resp_dict.get("errmsg") == "ok": # 将状态持久化到数据库 self.sl_coll.insert_one({ "send_type": self.send_type, "doc_id": doc_id, "ts": int(time.time()), }) # 下发成功 LOGGER.info(f"{notice_msg} 成功!") else: LOGGER.error(f"{notice_msg} 失败:{resp_dict.get('errmsg')}") send_status = False else: LOGGER.error(f"{notice_msg} 失败!") send_status = False return send_status
def save(self, backup_data: dict) -> bool: """执行备份动作 Args: backup_data (dict): 备份数据 Returns: bool: 是否成功 """ # 以下字段必须存在 doc_source = backup_data["doc_source"] doc_source_name = backup_data["doc_source_name"] doc_name = backup_data["doc_name"] # 源文件 doc_html = backup_data["doc_html"] file_msg = f"{doc_source}/{doc_source_name}/{doc_name}" file_path = f"{file_msg}.html" is_backup = self.is_backup( doc_source=doc_source, doc_source_name=doc_source_name, doc_name=doc_name, ) # 未备份过或者强制备份下将继续执行 if not is_backup or self.force_backup: filter_dict = { "doc_source": doc_source, "doc_source_name": doc_source_name, "doc_name": doc_name, } update_data = { "$set": { **filter_dict, **{"ts": int(time.time()), "content": text_compress(doc_html)}, } } db_update_res = mongodb_update_data( coll_conn=self.liuli_backup_coll, filter_dict=filter_dict, update_data=update_data, upsert=True, ) if db_update_res["status"]: msg = f"Backup({self.backup_type}): {file_path} 上传成功!" # 保存当前文章状态 self.save_backup( doc_source=doc_source, doc_source_name=doc_source_name, doc_name=doc_name, ) else: msg = f"Backup({self.backup_type}): {file_path} 上传失败!{db_update_res['info']}" else: msg = f"Backup({self.backup_type}): {file_path} 已存在!" LOGGER.info(msg)
def save(self, backup_data: dict) -> bool: """执行备份动作 Args: backup_data (dict): 备份数据 Returns: bool: 是否成功 """ # 以下字段必须存在 doc_source = backup_data["doc_source"] doc_source_name = backup_data["doc_source_name"] doc_name = backup_data["doc_name"] # 源文件 doc_html = backup_data["doc_html"] file_path = f"{doc_source}/{doc_source_name}/{doc_name}.html" is_backup = self.is_backup( doc_source=doc_source, doc_source_name=doc_source_name, doc_name=doc_name, ) # 在数据库存在就默认线上必定存在,希望用户不操作这个仓库造成状态不同步 if not is_backup or self.force_backup: # 上传前做是否存在检测,没有备份过继续远程备份 # 已存在的但是数据库没有状态需要重新同步 try: # 先判断文件是否存在 try: contents = self.repo.get_contents(file_path) # 存在就更新 self.repo.update_file(contents.path, f"Update {file_path}", doc_html, contents.sha) except Exception as e: # 不存在就上传 self.repo.create_file(file_path, f"Add {file_path}", doc_html) LOGGER.info(f"Backup({self.backup_type}): {file_path} 上传成功!") # 保存当前文章状态 self.save_backup( doc_source=doc_source, doc_source_name=doc_source_name, doc_name=doc_name, ) except GithubException as e: LOGGER.error( f"Backup({self.backup_type}): {file_path} 上传失败!{e}") else: LOGGER.info(f"Backup({self.backup_type}): {file_path} 已存在!")
def send(self, send_data) -> bool: """ 下发到Telegram :param send_data: 下发内容字典,字段开发者自定义 :return: """ doc_id = send_data["doc_id"] doc_name = send_data["doc_name"] doc_link = send_data["doc_link"] doc_cus_des = send_data["doc_cus_des"] doc_source_name = send_data["doc_source_name"] is_send = self.is_send(doc_id=doc_id) send_status = True if not is_send: message = TG_BOT_MSG_TEMPLATE.format_map(send_data) data = { "chat_id": self.chat_id, "text": message, "parse_mode": "HTML", "disable_web_page_preview": "yes", } resp_dict = send_post_request( url=self.url, data=data, headers={"Content-Type": "application/json"}, timeout=5, ) notice_msg = f"{doc_cus_des}👉{doc_source_name}_{doc_name}:{doc_link} 分发到 {self.send_type}" if resp_dict: if resp_dict.get("ok") is True: # 将状态持久化到数据库 self.sl_coll.insert_one({ "send_type": self.send_type, "doc_id": doc_id, "ts": int(time.time()), }) # 下发成功 LOGGER.info(f"{notice_msg} 成功!") else: LOGGER.error(f"{notice_msg} 失败:{resp_dict.get('errmsg')}") send_status = False else: LOGGER.error(f"{notice_msg} 失败!") send_status = False return send_status
def post(self): from flask_restful import request LOGGER.info(f'Received request {request}') LOGGER.info(f'Request JSON data: {request.get_json()}') message_attrs = parse_request(SlackMessageSchema, request) token = self.get_token() slack_client = self.setup_slack_client(token) response = slack_client.chat_postMessage(**message_attrs) LOGGER.info(f'Slack client response: {response}') return make_response( jsonify(code=response.status_code, data=response.data), response.status_code)
def run_liuli_task(ll_config: dict): """执行调度任务 Args: ll_config (dict): Liuli 任务配置 """ try: # 防止内部函数篡改 ll_config_data = deepcopy(ll_config) # 文章源, 用于基础查询条件 doc_source: str = ll_config_data["doc_source"] basic_filter = {"basic_filter": {"doc_source": doc_source}} # 采集器配置 collector_conf: dict = ll_config_data["collector"] # 处理器配置 processor_conf: dict = ll_config_data["processor"] # 分发器配置 sender_conf: dict = ll_config_data["sender"] sender_conf.update(basic_filter) # 备份器配置 backup_conf: dict = ll_config_data["backup"] backup_conf.update(basic_filter) # 采集器执行 LOGGER.info("采集器开始执行!") for collect_type, collect_config in collector_conf.items(): collect_factory(collect_type, collect_config) LOGGER.info("采集器执行完毕!") # 采集器执行 LOGGER.info("处理器(after_collect): 开始执行!") for each in processor_conf["after_collect"]: func_name = each.get("func") # 注入查询条件 each.update(basic_filter) LOGGER.info(f"处理器(after_collect): {func_name} 正在执行...") processor_dict[func_name](**each) LOGGER.info("处理器(after_collect): 执行完毕!") # 分发器执行 LOGGER.info("分发器开始执行!") send_doc(sender_conf) LOGGER.info("分发器执行完毕!") # 备份器执行 LOGGER.info("备份器开始执行!") backup_doc(backup_conf) LOGGER.info("备份器执行完毕!") except Exception as e: LOGGER.error(f"执行失败!{e}")
from src.utils import LOGGER def schedule_task(): """ 更新持久化订阅的公众号最新文章 :return: """ # 抓取最新的文章,然后持久化到数据库 update_wechat_doc() # 更新广告标签 update_ads_tag() # 文章分发 send_doc() if __name__ == "__main__": # 初次启动执行即可 run_wechat_name_spider() # 每日抓取公众号最新文章并更新广告标签 schedule.every().day.at("07:10").do(schedule_task) schedule.every().day.at("11:10").do(schedule_task) schedule.every().day.at("16:10").do(schedule_task) schedule.every().day.at("20:10").do(schedule_task) schedule.every().day.at("23:10").do(schedule_task) LOGGER.info("Schedule started successfully :)") LOGGER.info("Schedule time:\n 07:10 \n 11:10 \n 16:10 \n 20:10 \n 23:10") while True: schedule.run_pending() time.sleep(1)