예제 #1
0
 def get_task(self):
     """
     从数据库中定时获取需要执行的任务,发送至下载队列
     :return:
     """
     task_cell = self.crawler_setting.get(
         "task_cell") if self.crawler_setting.get("task_cell") else 10
     mq_queue = get_queue(self.crawler_setting, 'download')
     mq_conn = connect(mq_queue, self.mq_params[0], self.mq_params[1],
                       self.mq_params[2], self.mq_params[3])
     while True:
         if RedisUtil.get_lock():
             tasks = SqlUtil.get_task()
             if tasks:
                 for task in tasks:
                     task_id = task.get("task_id")
                     RedisUtil.monitor_task(task_id)
                     task["main_task_flag"] = 1
                     message = repr(task)
                     # 判断是否超出队列限制大小,超出则不下发
                     is_send(self.mq_params, self.crawler_setting, mq_queue)
                     send_data(mq_conn, '', message, mq_queue)
                     SqlUtil.update_task(
                         1, "'{}'".format(task_id),
                         "'{}'".format(task.get("exec_time")),
                         "'{}'".format(task.get("pre_exec_time")))
                 Logger.logger.info(
                     "任务发送完成, 开始进行休眠, 休眠..{}s..".format(task_cell))
             else:
                 Logger.logger.info(
                     "没有可提取的任务,开始进行休眠,休眠..{}s..".format(task_cell))
             RedisUtil.release_lock()
         else:
             Logger.logger.info("未抢到锁,休眠..{}s..".format(task_cell))
         time.sleep(task_cell)
예제 #2
0
 def back_task(self):
     """
     回收任务
     :return:
     """
     mq_queue = get_queue(self.crawler_setting, "recovery")
     mq_conn_recovery = connect(mq_queue, self.mq_params[0],
                                self.mq_params[1], self.mq_params[2],
                                self.mq_params[3])
     self.call_back(**{
         "no_ack": None,
         "channel": mq_conn_recovery,
         "routing_key": mq_queue
     })
예제 #3
0
 def generate_task(self):
     """
     生成任务
     :return:
     """
     mq_queue = get_queue(self.crawler_setting, "dispatch")
     mq_conn_download = connect(mq_queue, self.mq_params[0],
                                self.mq_params[1], self.mq_params[2],
                                self.mq_params[3])
     self.call_back(**{
         "no_ack": None,
         "channel": mq_conn_download,
         "routing_key": mq_queue
     })
예제 #4
0
    def process(self):
        crawler_mode = self.crawler_setting.get("crawler_mode")
        if not crawler_mode:
            self.simple()
        else:
            try:
                user = self.crawler_setting.get("mq").get("user")
                pwd = self.crawler_setting.get("mq").get("pwd")
                host = self.crawler_setting.get("mq").get("host")
                port = self.crawler_setting.get("mq").get("port")
                mq_queue = get_queue(self.crawler_setting, "extract")
            except AttributeError:
                user = "******"
                pwd = "crawler4py"
                host = "127.0.0.1"
                port = 5672
                mq_queue = "extract"

            mq_conn = connect(mq_queue, user, pwd, host, port)
            self.call_back(**{"no_ack": None, "channel": mq_conn, "routing_key": mq_queue})
예제 #5
0
 def process(self):
     crawler_mode = self.crawler_setting.get("crawler_mode")
     if not crawler_mode:
         self.simple()
     else:
         try:
             user = self.crawler_setting.get("mq").get("user")
             pwd = self.crawler_setting.get("mq").get("pwd")
             host = self.crawler_setting.get("mq").get("host")
             port = self.crawler_setting.get("mq").get("port")
             mq_queue = get_queue(self.crawler_setting, "download")
         except AttributeError:
             user = "******"
             pwd = "crawler4py"
             host = "127.0.0.1"
             port = 5672
             mq_queue = "download"
         mq_conn = connect(mq_queue, user, pwd, host, port)
         try:
             plugin_path = self.crawler_setting.get("plugins").get("download")
         except ArithmeticError:
             plugin_path = None
         self.call_back(
             **{"no_ack": None, "channel": mq_conn, "routing_key": mq_queue, "plugin_path": plugin_path})