def create_instance(self, cron_conf, etl_day): instance_list = [] cron_log = models.CronLog().create_cron_log(cron_conf, etl_day) if cron_conf.task_id == State.ROOT_TASK: job_list = models.TaskDefine().get_valid_job() models.TaskInstance().create_job_task_instance( execute_date=etl_day, job_list=job_list) elif cron_conf.type == State.TASK_CRON: instance_list = models.TaskInstance().create_cron_task_instance( execute_date=etl_day, cron_list=[cron_conf]) else: err = "task_id:{} name:{} type:{}, unknow task type".format( cron_conf.task_id, cron_conf.name, cron_conf.type) log.logger.info("{}".format(err)) cron_log.update_cron_status(State.SHUTDOWN) return for instance in instance_list: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.info("push task to queue, instance {}, err {}".format( instance, err)) cron_log.update_cron_status(State.SUCCESS)
def run_all_job(self, date_list): """ run_all_job :param date_list: :return: """ job_list = models.TaskDefine().get_valid_job() msg = "" for etl_day in date_list: print(len(job_list), etl_day) models.TaskInstance().create_job_task_instance(etl_day, job_list) msg = "generate {} job task instance ".format( len(date_list) * len(job_list)) return msg
def rerun_task(self, task_id, date_list, up_and_down=False, run_up=False, run_down=False, force=False): """ rerun_task :param task_id: task_id :param date_list: range list :param run_up:run upstream :param run_down: run downstream :return: """ kafka_utils.setup_kafka(config.G_Conf.Common.Broker) # run job define instance_list = [] msg = "" run_type = "" for loop in ["looop"]: _ = loop job = models.TaskDefine().get_job_by_task_id( task_id_list=[task_id]) if job and len(job) > 0: run_type = "job" job_list = [] if run_up: job_list = models.TaskDependency().get_all_upstream( task_id) elif run_down: job_list = models.TaskDependency().get_all_downstream( task_id) elif up_and_down: up_job = models.TaskDependency().get_all_upstream(task_id) down_job = models.TaskDependency().get_all_downstream( task_id) if len(up_job) > 0: job_list.extend(up_job) if len(down_job) > 0: job_list.extend(down_job) else: # run a job with force if force: for etl_day in date_list: tmp = models.TaskInstance( ).direct_run_single_job_task(etl_day, job) if tmp > 0: instance_list.extend(tmp) break else: # run single waiting dependency pass # add self job_list.append(task_id) need_run_job_list = models.TaskDefine().get_job_by_task_id( task_id_list=job_list) if need_run_job_list and len(need_run_job_list) > 0: for etl_day in date_list: models.TaskInstance().create_job_task_instance( execute_date=etl_day, job_list=need_run_job_list) msg = "generate {} TaskDefine task instance ".format( len(need_run_job_list) * len(date_list)) break # run cron task cron = models.CronConf().get_cron_by_task_id(task_id=task_id) if cron: run_type = "cron" for etl_day in date_list: tmp = models.TaskInstance().create_cron_task_instance( execute_date=etl_day, cron_list=[cron]) if len(tmp) > 0: instance_list.extend(tmp) break for instance in instance_list: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.info("push task to queue, instance {}, err {}".format( instance, err)) if len(instance_list) > 0: msg = "generate {} {} task instance ".format( len(instance_list), run_type) return msg
def run(self): #执行次数 running_times = 0 msg = None try: while running_times <= self.retry: task_runner = BashTaskRunner(self.instance) self.begin_time = time.time() self.instance.worker_retry = running_times #提交运行状态 should_run = self.instance.start_running( retry=(True if running_times > 0 else False)) if should_run is not None: log.logger.info("{}".format(should_run)) msg = None break ret = self.inner_run(task_runner, running_times) if ret is None: self.instance.stop_running(State.SUCCESS) kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_TASK_RESULT, kafka_utils.TaskOverMsg( instance_id=self.instance.id, task_id=self.instance.task_id, status=State.SUCCESS, execute_date=self.instance.etl_day)) msg = None break else: msg = "the {} times running:{}".format(running_times, ret) if self.instance.status == State.KILLED: # if instance is killd, should stop running break elif self.instance.status == State.TIMEOUT: self.instance.stop_running(State.TIMEOUT) else: self.instance.stop_running(State.FAILED) if running_times < self.retry: msg = "{}, after {} seconds will try the {} times ".format( msg, self.step_seconds * (running_times + 1), running_times + 1) log.logger.error(msg) running_times += 1 if running_times <= self.retry: time.sleep(self.step_seconds * running_times) else: msg = "reach the max retry times {} with err:{}, stop running".format( self.retry, msg) log.logger.info(msg) except Exception as e: msg = "get Exception {}.{}".format(type(e), str(e)) log.logger.error(msg) finally: process_utils.ref_counter.unref() if msg is not None: keeper = "unknown" log.logger.error("run {}, err: {}".format(self.instance, msg)) if self.instance.task_type == State.TASK_JOB: job_list = models.TaskDefine().get_job_by_task_id( [self.instance.task_id]) if len(job_list) > 0: keeper = "{}({})".format(self.instance.task_id, job_list[0].keeper) else: # should not come here keeper = "{}".format(self.instance.task_id) elif self.instance.task_type == State.TASK_EXTRACT: keeper = "{}(rule_id:{})".format(self.instance.task_id, self.instance.sub_task_id) elif self.instance.task_type == State.TASK_CRON: keeper = "{}(定时任务)".format(self.instance.task_id) else: pass msg = "\nTask: {} \nError: {} \nContext: {}".format( keeper, msg, self.instance) process_utils.Alert(msg) return