Ejemplo n.º 1
0
    def create_instance(self, cron_conf, etl_day):
        instance_list = []
        cron_log = models.CronLog().create_cron_log(cron_conf, etl_day)
        if cron_conf.task_id == State.ROOT_TASK:
            job_list = models.TaskDefine().get_valid_job()
            models.TaskInstance().create_job_task_instance(
                execute_date=etl_day, job_list=job_list)
        elif cron_conf.type == State.TASK_CRON:
            instance_list = models.TaskInstance().create_cron_task_instance(
                execute_date=etl_day, cron_list=[cron_conf])
        else:
            err = "task_id:{} name:{} type:{}, unknow task type".format(
                cron_conf.task_id, cron_conf.name, cron_conf.type)
            log.logger.info("{}".format(err))
            cron_log.update_cron_status(State.SHUTDOWN)
            return

        for instance in instance_list:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.info("push task to queue, instance {}, err {}".format(
                instance, err))
        cron_log.update_cron_status(State.SUCCESS)
Ejemplo n.º 2
0
 def kill_task(self, task_id, date_list):
     """
     kill_task
     :param task_id:
     :param date_list:
     :return:
     """
     return models.TaskInstance().kill_instance(task_id, date_list)
Ejemplo n.º 3
0
 def prepare_to_run(self, instance_id):
     """
     prepare_to_run
     :param instance_id:
     :return:
     """
     err, task_instace = models.TaskInstance().refresh_instance(instance_id)
     return err, task_instace
Ejemplo n.º 4
0
    def run_task(self, execute_date=None, task_list=None):
        """
        run_dep_task
        :param execute_date: YYYY-MM-DD
        :param task_list: task_list
        :return:
        """
        new_instance = []
        if execute_date is None or task_list is None:
            waiting_dep_instance = models.TaskInstance(
            ).get_waiting_dep_instance()
            for waiting_task in waiting_dep_instance:
                etl_day = waiting_task.etl_day
                met, msg = models.TaskDependency().\
                    is_met_dependency(waiting_task.task_id,
                                      waiting_task.etl_day)
                if met:
                    tmp = models.TaskInstance(). \
                        job_prepare_running(etl_day, [waiting_task.task_id])
                    if len(tmp) > 0:
                        new_instance.extend(tmp)
                else:
                    log.logger.info(
                        "etl_day: {}, task {} not met dependency -> {}".format(
                            etl_day, waiting_task.task_id, msg))
        else:
            new_instance = models.TaskInstance(). \
                job_prepare_running(execute_date, task_list)

        log.logger.info(" queued new_instance {}".format(new_instance))
        for instance in new_instance:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.info("push task to queue, instance {}, err {}".format(
                instance, err))
Ejemplo n.º 5
0
 def run_all_job(self, date_list):
     """
     run_all_job
     :param date_list:
     :return:
     """
     job_list = models.TaskDefine().get_valid_job()
     msg = ""
     for etl_day in date_list:
         print(len(job_list), etl_day)
         models.TaskInstance().create_job_task_instance(etl_day, job_list)
     msg = "generate {} job task instance ".format(
         len(date_list) * len(job_list))
     return msg
Ejemplo n.º 6
0
    def retryZombieInstance(self):
        """
        fetchZombieInstance
        :param session:
        :return:
        """
        log.logger.debug("begin retry timeout task instance")
        instance_list = models.TaskInstance().create_retry_instance(
            timer_out=self.config["woker_timeout"],
            max_schedule=self.config["retry_push_times"])
        for instance in instance_list:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.debug("retry timeout task instance {}, err {}".format(
                instance, err))

        log.logger.info("end retry timeout task instance")
Ejemplo n.º 7
0
    def rerun_task(self,
                   task_id,
                   date_list,
                   up_and_down=False,
                   run_up=False,
                   run_down=False,
                   force=False):
        """
        rerun_task
        :param task_id: task_id
        :param date_list: range list
        :param run_up:run upstream
        :param run_down: run downstream
        :return:
        """
        kafka_utils.setup_kafka(config.G_Conf.Common.Broker)
        # run job define
        instance_list = []
        msg = ""
        run_type = ""
        for loop in ["looop"]:
            _ = loop
            job = models.TaskDefine().get_job_by_task_id(
                task_id_list=[task_id])
            if job and len(job) > 0:
                run_type = "job"
                job_list = []
                if run_up:
                    job_list = models.TaskDependency().get_all_upstream(
                        task_id)
                elif run_down:
                    job_list = models.TaskDependency().get_all_downstream(
                        task_id)
                elif up_and_down:
                    up_job = models.TaskDependency().get_all_upstream(task_id)
                    down_job = models.TaskDependency().get_all_downstream(
                        task_id)
                    if len(up_job) > 0:
                        job_list.extend(up_job)
                    if len(down_job) > 0:
                        job_list.extend(down_job)
                else:
                    # run a job with force
                    if force:
                        for etl_day in date_list:
                            tmp = models.TaskInstance(
                            ).direct_run_single_job_task(etl_day, job)
                            if tmp > 0:
                                instance_list.extend(tmp)
                        break
                    else:
                        # run single waiting dependency
                        pass

                # add self
                job_list.append(task_id)
                need_run_job_list = models.TaskDefine().get_job_by_task_id(
                    task_id_list=job_list)
                if need_run_job_list and len(need_run_job_list) > 0:
                    for etl_day in date_list:
                        models.TaskInstance().create_job_task_instance(
                            execute_date=etl_day, job_list=need_run_job_list)
                    msg = "generate {} TaskDefine task instance ".format(
                        len(need_run_job_list) * len(date_list))
                break

            # run cron task
            cron = models.CronConf().get_cron_by_task_id(task_id=task_id)
            if cron:
                run_type = "cron"
                for etl_day in date_list:
                    tmp = models.TaskInstance().create_cron_task_instance(
                        execute_date=etl_day, cron_list=[cron])
                    if len(tmp) > 0:
                        instance_list.extend(tmp)
                break

        for instance in instance_list:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.info("push task to queue, instance {}, err {}".format(
                instance, err))
        if len(instance_list) > 0:
            msg = "generate {} {} task instance ".format(
                len(instance_list), run_type)
        return msg