def migrate_dependency(self, job_id=None, delete=False): """ migrate_dependency :param delete: delete data :param job_id: if job_id is None, migrate all job :return: """ models.TaskDependency().migrateDep(job_id=job_id, delete=delete)
def query_dep(self, task_id, etl_day): met, msg = models.TaskDependency(). \ is_met_dependency(task_id, etl_day) if met: print "task {}, etl_day {} met dependency".format(task_id, etl_day) else: print "task {}, etl_day {} not met dependency: {}".format( task_id, etl_day, msg)
def run(self): log.logger.info("Starting the DependencyScheduler") producer = kafka_utils.setup_kafka(config.G_Conf.Common.Broker) def gen_obj(d): return kafka_utils.TaskOverMsg(d['instance_id'], d['task_id'], d['status'], d['execute_date']) exception_count = 0 exception_begin = datetime.now() max_exception_count = 5 max_exception_seconds = 3 * 60 while True: try: # for msg in kafka_utils.TOPIC_TASK_RESULT: if producer.llen(kafka_utils.TOPIC_TASK_RESULT): msg = producer.rpop( kafka_utils.TOPIC_TASK_RESULT ) #这里可以用StrictRedis实例的brpop改善,去掉llen轮询。 msg_obj = gen_obj(json.loads(msg)) log.logger.info("get task result:{}".format(msg_obj)) #kafka_utils.scheduler_consumer.commit() # the worker push msg only success met_task = models.TaskDependency( ).downstream_met_dependency( task_id=msg_obj.task_id, execute_date=msg_obj.execute_date) if len(met_task) > 0: self.run_task(msg_obj.execute_date, met_task) else: log.logger.info("begin fetch waiting_dep task list") self.run_task() log.logger.info("end fetch waiting_dep task list ") time.sleep(10) except Exception as e: exception_count += 1 if exception_count == 1: exception_begin = datetime.now() exception_duration = (datetime.now() - exception_begin).total_seconds() alert_msg = "if get exception {} times in {} seconds, " \ "the DependencyScheduler will exit, current:{} times/{} seconds, exception:{}-{}". \ format(max_exception_count, max_exception_seconds, exception_count, exception_duration, type(e), str(e)) if exception_count >= max_exception_count and exception_duration >= max_exception_seconds: alert_msg = "scheduler exit, do somthing {}".format( alert_msg) log.logger.error(alert_msg) process_utils.Alert(alert_msg) return else: log.logger.error(alert_msg) process_utils.Alert(alert_msg) time.sleep(10) log.logger.info("Quit the DependencyScheduler")
def run(self): log.logger.info("Starting the DependencyScheduler") def gen_obj(d): return kafka_utils.TaskOverMsg(d['instance_id'], d['task_id'], d['status'], d['execute_date']) exception_count = 0 exception_begin = datetime.now() max_exception_count = 5 max_exception_seconds = 3 * 60 while True: try: for msg in kafka_utils.scheduler_consumer: msg_obj = json.loads(msg.value, object_hook=gen_obj) log.logger.info("get task result:{}".format(msg_obj)) #kafka_utils.scheduler_consumer.commit() # the worker push msg only success met_task = models.TaskDependency( ).downstream_met_dependency( task_id=msg_obj.task_id, execute_date=msg_obj.execute_date) if len(met_task) > 0: self.run_task(msg_obj.execute_date, met_task) else: log.logger.info("begin fetch waiting_dep task list") self.run_task() log.logger.info("end fetch waiting_dep task list ") except Exception as e: exception_count += 1 if exception_count == 1: exception_begin = datetime.now() exception_duration = (datetime.now() - exception_begin).total_seconds() alert_msg = "if get exception {} times in {} seconds, " \ "the DependencyScheduler will exit, current:{} times/{} seconds, exception:{}-{}". \ format(max_exception_count, max_exception_seconds, exception_count, exception_duration, type(e), e.message) if exception_count >= max_exception_count and exception_duration >= max_exception_seconds: alert_msg = "scheduler exit, do somthing {}".format( alert_msg) log.logger.error(alert_msg) process_utils.Alert(alert_msg) return else: log.logger.error(alert_msg) process_utils.Alert(alert_msg) time.sleep(10) log.logger.info("Quit the DependencyScheduler")
def run_task(self, execute_date=None, task_list=None): """ run_dep_task :param execute_date: YYYY-MM-DD :param task_list: task_list :return: """ new_instance = [] if execute_date is None or task_list is None: waiting_dep_instance = models.TaskInstance( ).get_waiting_dep_instance() for waiting_task in waiting_dep_instance: etl_day = waiting_task.etl_day met, msg = models.TaskDependency().\ is_met_dependency(waiting_task.task_id, waiting_task.etl_day) if met: tmp = models.TaskInstance(). \ job_prepare_running(etl_day, [waiting_task.task_id]) if len(tmp) > 0: new_instance.extend(tmp) else: log.logger.info( "etl_day: {}, task {} not met dependency -> {}".format( etl_day, waiting_task.task_id, msg)) else: new_instance = models.TaskInstance(). \ job_prepare_running(execute_date, task_list) log.logger.info(" queued new_instance {}".format(new_instance)) for instance in new_instance: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.info("push task to queue, instance {}, err {}".format( instance, err))
def rerun_task(self, task_id, date_list, up_and_down=False, run_up=False, run_down=False, force=False): """ rerun_task :param task_id: task_id :param date_list: range list :param run_up:run upstream :param run_down: run downstream :return: """ kafka_utils.setup_kafka(config.G_Conf.Common.Broker) # run job define instance_list = [] msg = "" run_type = "" for loop in ["looop"]: _ = loop job = models.TaskDefine().get_job_by_task_id( task_id_list=[task_id]) if job and len(job) > 0: run_type = "job" job_list = [] if run_up: job_list = models.TaskDependency().get_all_upstream( task_id) elif run_down: job_list = models.TaskDependency().get_all_downstream( task_id) elif up_and_down: up_job = models.TaskDependency().get_all_upstream(task_id) down_job = models.TaskDependency().get_all_downstream( task_id) if len(up_job) > 0: job_list.extend(up_job) if len(down_job) > 0: job_list.extend(down_job) else: # run a job with force if force: for etl_day in date_list: tmp = models.TaskInstance( ).direct_run_single_job_task(etl_day, job) if tmp > 0: instance_list.extend(tmp) break else: # run single waiting dependency pass # add self job_list.append(task_id) need_run_job_list = models.TaskDefine().get_job_by_task_id( task_id_list=job_list) if need_run_job_list and len(need_run_job_list) > 0: for etl_day in date_list: models.TaskInstance().create_job_task_instance( execute_date=etl_day, job_list=need_run_job_list) msg = "generate {} TaskDefine task instance ".format( len(need_run_job_list) * len(date_list)) break # run cron task cron = models.CronConf().get_cron_by_task_id(task_id=task_id) if cron: run_type = "cron" for etl_day in date_list: tmp = models.TaskInstance().create_cron_task_instance( execute_date=etl_day, cron_list=[cron]) if len(tmp) > 0: instance_list.extend(tmp) break for instance in instance_list: err = kafka_utils.PushMsgWithRetry( kafka_utils.TOPIC_DISPATCHER, kafka_utils.TaskBeginMsg(instance_id=instance.id, task_id=instance.task_id, execute_date=instance.etl_day)) log.logger.info("push task to queue, instance {}, err {}".format( instance, err)) if len(instance_list) > 0: msg = "generate {} {} task instance ".format( len(instance_list), run_type) return msg