def create_instance(self, cron_conf, etl_day): instance_list = [] cron_log = models.CronLog().create_cron_log(cron_conf, etl_day) if cron_conf.task_id == State.ROOT_TASK: job_list = models.TaskDefine().get_valid_job() models.TaskInstance().create_job_task_instance( execute_date=etl_day, job_list=job_list) elif cron_conf.type == State.TASK_CRON: instance_list = models.TaskInstance().create_cron_task_instance( execute_date=etl_day, cron_list=[cron_conf]) else: err = "task_id:{} name:{} type:{}, unknow task type".format( cron_conf.task_id, cron_conf.name, cron_conf.type) log.logger.info("{}".format(err)) cron_log.update_cron_status(State.SHUTDOWN) return for instance in instance_list: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.info("push task to queue, instance {}, err {}".format( instance, err)) cron_log.update_cron_status(State.SUCCESS)
def kill_task(self, task_id, date_list): """ kill_task :param task_id: :param date_list: :return: """ return models.TaskInstance().kill_instance(task_id, date_list)
def prepare_to_run(self, instance_id): """ prepare_to_run :param instance_id: :return: """ err, task_instace = models.TaskInstance().refresh_instance(instance_id) return err, task_instace
def run_task(self, execute_date=None, task_list=None): """ run_dep_task :param execute_date: YYYY-MM-DD :param task_list: task_list :return: """ new_instance = [] if execute_date is None or task_list is None: waiting_dep_instance = models.TaskInstance( ).get_waiting_dep_instance() for waiting_task in waiting_dep_instance: etl_day = waiting_task.etl_day met, msg = models.TaskDependency().\ is_met_dependency(waiting_task.task_id, waiting_task.etl_day) if met: tmp = models.TaskInstance(). \ job_prepare_running(etl_day, [waiting_task.task_id]) if len(tmp) > 0: new_instance.extend(tmp) else: log.logger.info( "etl_day: {}, task {} not met dependency -> {}".format( etl_day, waiting_task.task_id, msg)) else: new_instance = models.TaskInstance(). \ job_prepare_running(execute_date, task_list) log.logger.info(" queued new_instance {}".format(new_instance)) for instance in new_instance: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.info("push task to queue, instance {}, err {}".format( instance, err))
def run_all_job(self, date_list): """ run_all_job :param date_list: :return: """ job_list = models.TaskDefine().get_valid_job() msg = "" for etl_day in date_list: print(len(job_list), etl_day) models.TaskInstance().create_job_task_instance(etl_day, job_list) msg = "generate {} job task instance ".format( len(date_list) * len(job_list)) return msg
def retryZombieInstance(self): """ fetchZombieInstance :param session: :return: """ log.logger.debug("begin retry timeout task instance") instance_list = models.TaskInstance().create_retry_instance( timer_out=self.config["woker_timeout"], max_schedule=self.config["retry_push_times"]) for instance in instance_list: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.debug("retry timeout task instance {}, err {}".format( instance, err)) log.logger.info("end retry timeout task instance")
def rerun_task(self, task_id, date_list, up_and_down=False, run_up=False, run_down=False, force=False): """ rerun_task :param task_id: task_id :param date_list: range list :param run_up:run upstream :param run_down: run downstream :return: """ kafka_utils.setup_kafka(config.G_Conf.Common.Broker) # run job define instance_list = [] msg = "" run_type = "" for loop in ["looop"]: _ = loop job = models.TaskDefine().get_job_by_task_id( task_id_list=[task_id]) if job and len(job) > 0: run_type = "job" job_list = [] if run_up: job_list = models.TaskDependency().get_all_upstream( task_id) elif run_down: job_list = models.TaskDependency().get_all_downstream( task_id) elif up_and_down: up_job = models.TaskDependency().get_all_upstream(task_id) down_job = models.TaskDependency().get_all_downstream( task_id) if len(up_job) > 0: job_list.extend(up_job) if len(down_job) > 0: job_list.extend(down_job) else: # run a job with force if force: for etl_day in date_list: tmp = models.TaskInstance( ).direct_run_single_job_task(etl_day, job) if tmp > 0: instance_list.extend(tmp) break else: # run single waiting dependency pass # add self job_list.append(task_id) need_run_job_list = models.TaskDefine().get_job_by_task_id( task_id_list=job_list) if need_run_job_list and len(need_run_job_list) > 0: for etl_day in date_list: models.TaskInstance().create_job_task_instance( execute_date=etl_day, job_list=need_run_job_list) msg = "generate {} TaskDefine task instance ".format( len(need_run_job_list) * len(date_list)) break # run cron task cron = models.CronConf().get_cron_by_task_id(task_id=task_id) if cron: run_type = "cron" for etl_day in date_list: tmp = models.TaskInstance().create_cron_task_instance( execute_date=etl_day, cron_list=[cron]) if len(tmp) > 0: instance_list.extend(tmp) break for instance in instance_list: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.info("push task to queue, instance {}, err {}".format( instance, err)) if len(instance_list) > 0: msg = "generate {} {} task instance ".format( len(instance_list), run_type) return msg