def get_task(self): """ 从数据库中定时获取需要执行的任务,发送至下载队列 :return: """ task_cell = self.crawler_setting.get( "task_cell") if self.crawler_setting.get("task_cell") else 10 mq_queue = get_queue(self.crawler_setting, 'download') mq_conn = connect(mq_queue, self.mq_params[0], self.mq_params[1], self.mq_params[2], self.mq_params[3]) while True: if RedisUtil.get_lock(): tasks = SqlUtil.get_task() if tasks: for task in tasks: task_id = task.get("task_id") RedisUtil.monitor_task(task_id) task["main_task_flag"] = 1 message = repr(task) # 判断是否超出队列限制大小,超出则不下发 is_send(self.mq_params, self.crawler_setting, mq_queue) send_data(mq_conn, '', message, mq_queue) SqlUtil.update_task( 1, "'{}'".format(task_id), "'{}'".format(task.get("exec_time")), "'{}'".format(task.get("pre_exec_time"))) Logger.logger.info( "任务发送完成, 开始进行休眠, 休眠..{}s..".format(task_cell)) else: Logger.logger.info( "没有可提取的任务,开始进行休眠,休眠..{}s..".format(task_cell)) RedisUtil.release_lock() else: Logger.logger.info("未抢到锁,休眠..{}s..".format(task_cell)) time.sleep(task_cell)
def back_task(self): """ 回收任务 :return: """ mq_queue = get_queue(self.crawler_setting, "recovery") mq_conn_recovery = connect(mq_queue, self.mq_params[0], self.mq_params[1], self.mq_params[2], self.mq_params[3]) self.call_back(**{ "no_ack": None, "channel": mq_conn_recovery, "routing_key": mq_queue })
def generate_task(self): """ 生成任务 :return: """ mq_queue = get_queue(self.crawler_setting, "dispatch") mq_conn_download = connect(mq_queue, self.mq_params[0], self.mq_params[1], self.mq_params[2], self.mq_params[3]) self.call_back(**{ "no_ack": None, "channel": mq_conn_download, "routing_key": mq_queue })
def process(self): crawler_mode = self.crawler_setting.get("crawler_mode") if not crawler_mode: self.simple() else: try: user = self.crawler_setting.get("mq").get("user") pwd = self.crawler_setting.get("mq").get("pwd") host = self.crawler_setting.get("mq").get("host") port = self.crawler_setting.get("mq").get("port") mq_queue = get_queue(self.crawler_setting, "extract") except AttributeError: user = "******" pwd = "crawler4py" host = "127.0.0.1" port = 5672 mq_queue = "extract" mq_conn = connect(mq_queue, user, pwd, host, port) self.call_back(**{"no_ack": None, "channel": mq_conn, "routing_key": mq_queue})
def process(self): crawler_mode = self.crawler_setting.get("crawler_mode") if not crawler_mode: self.simple() else: try: user = self.crawler_setting.get("mq").get("user") pwd = self.crawler_setting.get("mq").get("pwd") host = self.crawler_setting.get("mq").get("host") port = self.crawler_setting.get("mq").get("port") mq_queue = get_queue(self.crawler_setting, "download") except AttributeError: user = "******" pwd = "crawler4py" host = "127.0.0.1" port = 5672 mq_queue = "download" mq_conn = connect(mq_queue, user, pwd, host, port) try: plugin_path = self.crawler_setting.get("plugins").get("download") except ArithmeticError: plugin_path = None self.call_back( **{"no_ack": None, "channel": mq_conn, "routing_key": mq_queue, "plugin_path": plugin_path})