Ejemplo n.º 1
0
 def migrate_dependency(self, job_id=None, delete=False):
     """
     migrate_dependency
     :param delete:  delete data
     :param job_id:  if job_id is None, migrate all job
     :return:
     """
     models.TaskDependency().migrateDep(job_id=job_id, delete=delete)
Ejemplo n.º 2
0
 def query_dep(self, task_id, etl_day):
     met, msg = models.TaskDependency(). \
         is_met_dependency(task_id, etl_day)
     if met:
         print "task {}, etl_day {} met dependency".format(task_id, etl_day)
     else:
         print "task {}, etl_day {} not met dependency: {}".format(
             task_id, etl_day, msg)
Ejemplo n.º 3
0
    def run(self):
        log.logger.info("Starting the DependencyScheduler")
        producer = kafka_utils.setup_kafka(config.G_Conf.Common.Broker)

        def gen_obj(d):
            return kafka_utils.TaskOverMsg(d['instance_id'], d['task_id'],
                                           d['status'], d['execute_date'])

        exception_count = 0
        exception_begin = datetime.now()
        max_exception_count = 5
        max_exception_seconds = 3 * 60

        while True:
            try:
                # for msg in kafka_utils.TOPIC_TASK_RESULT:
                if producer.llen(kafka_utils.TOPIC_TASK_RESULT):
                    msg = producer.rpop(
                        kafka_utils.TOPIC_TASK_RESULT
                    )  #这里可以用StrictRedis实例的brpop改善,去掉llen轮询。
                    msg_obj = gen_obj(json.loads(msg))
                    log.logger.info("get task result:{}".format(msg_obj))
                    #kafka_utils.scheduler_consumer.commit()

                    # the worker push msg only success
                    met_task = models.TaskDependency(
                    ).downstream_met_dependency(
                        task_id=msg_obj.task_id,
                        execute_date=msg_obj.execute_date)
                    if len(met_task) > 0:
                        self.run_task(msg_obj.execute_date, met_task)
                else:
                    log.logger.info("begin fetch waiting_dep task list")
                    self.run_task()
                    log.logger.info("end fetch waiting_dep task list ")
                    time.sleep(10)
            except Exception as e:
                exception_count += 1
                if exception_count == 1:
                    exception_begin = datetime.now()
                exception_duration = (datetime.now() -
                                      exception_begin).total_seconds()
                alert_msg = "if get exception {} times in {} seconds, " \
                            "the DependencyScheduler will exit, current:{} times/{} seconds, exception:{}-{}". \
                    format(max_exception_count, max_exception_seconds, exception_count, exception_duration, type(e), str(e))
                if exception_count >= max_exception_count and exception_duration >= max_exception_seconds:
                    alert_msg = "scheduler exit, do somthing {}".format(
                        alert_msg)
                    log.logger.error(alert_msg)
                    process_utils.Alert(alert_msg)
                    return
                else:
                    log.logger.error(alert_msg)
                    process_utils.Alert(alert_msg)
                time.sleep(10)
        log.logger.info("Quit the DependencyScheduler")
Ejemplo n.º 4
0
    def run(self):
        log.logger.info("Starting the DependencyScheduler")

        def gen_obj(d):
            return kafka_utils.TaskOverMsg(d['instance_id'], d['task_id'],
                                           d['status'], d['execute_date'])

        exception_count = 0
        exception_begin = datetime.now()
        max_exception_count = 5
        max_exception_seconds = 3 * 60

        while True:
            try:
                for msg in kafka_utils.scheduler_consumer:
                    msg_obj = json.loads(msg.value, object_hook=gen_obj)
                    log.logger.info("get task result:{}".format(msg_obj))
                    #kafka_utils.scheduler_consumer.commit()

                    # the worker push msg only success
                    met_task = models.TaskDependency(
                    ).downstream_met_dependency(
                        task_id=msg_obj.task_id,
                        execute_date=msg_obj.execute_date)
                    if len(met_task) > 0:
                        self.run_task(msg_obj.execute_date, met_task)
                else:
                    log.logger.info("begin fetch waiting_dep task list")
                    self.run_task()
                    log.logger.info("end fetch waiting_dep task list ")
            except Exception as e:
                exception_count += 1
                if exception_count == 1:
                    exception_begin = datetime.now()
                exception_duration = (datetime.now() -
                                      exception_begin).total_seconds()
                alert_msg = "if get exception {} times in {} seconds, " \
                            "the DependencyScheduler will exit, current:{} times/{} seconds, exception:{}-{}". \
                    format(max_exception_count, max_exception_seconds, exception_count, exception_duration, type(e), e.message)
                if exception_count >= max_exception_count and exception_duration >= max_exception_seconds:
                    alert_msg = "scheduler exit, do somthing {}".format(
                        alert_msg)
                    log.logger.error(alert_msg)
                    process_utils.Alert(alert_msg)
                    return
                else:
                    log.logger.error(alert_msg)
                    process_utils.Alert(alert_msg)
                time.sleep(10)
        log.logger.info("Quit the DependencyScheduler")
Ejemplo n.º 5
0
    def run_task(self, execute_date=None, task_list=None):
        """
        run_dep_task
        :param execute_date: YYYY-MM-DD
        :param task_list: task_list
        :return:
        """
        new_instance = []
        if execute_date is None or task_list is None:
            waiting_dep_instance = models.TaskInstance(
            ).get_waiting_dep_instance()
            for waiting_task in waiting_dep_instance:
                etl_day = waiting_task.etl_day
                met, msg = models.TaskDependency().\
                    is_met_dependency(waiting_task.task_id,
                                      waiting_task.etl_day)
                if met:
                    tmp = models.TaskInstance(). \
                        job_prepare_running(etl_day, [waiting_task.task_id])
                    if len(tmp) > 0:
                        new_instance.extend(tmp)
                else:
                    log.logger.info(
                        "etl_day: {}, task {} not met dependency -> {}".format(
                            etl_day, waiting_task.task_id, msg))
        else:
            new_instance = models.TaskInstance(). \
                job_prepare_running(execute_date, task_list)

        log.logger.info(" queued new_instance {}".format(new_instance))
        for instance in new_instance:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.info("push task to queue, instance {}, err {}".format(
                instance, err))
Ejemplo n.º 6
0
    def rerun_task(self,
                   task_id,
                   date_list,
                   up_and_down=False,
                   run_up=False,
                   run_down=False,
                   force=False):
        """
        rerun_task
        :param task_id: task_id
        :param date_list: range list
        :param run_up:run upstream
        :param run_down: run downstream
        :return:
        """
        kafka_utils.setup_kafka(config.G_Conf.Common.Broker)
        # run job define
        instance_list = []
        msg = ""
        run_type = ""
        for loop in ["looop"]:
            _ = loop
            job = models.TaskDefine().get_job_by_task_id(
                task_id_list=[task_id])
            if job and len(job) > 0:
                run_type = "job"
                job_list = []
                if run_up:
                    job_list = models.TaskDependency().get_all_upstream(
                        task_id)
                elif run_down:
                    job_list = models.TaskDependency().get_all_downstream(
                        task_id)
                elif up_and_down:
                    up_job = models.TaskDependency().get_all_upstream(task_id)
                    down_job = models.TaskDependency().get_all_downstream(
                        task_id)
                    if len(up_job) > 0:
                        job_list.extend(up_job)
                    if len(down_job) > 0:
                        job_list.extend(down_job)
                else:
                    # run a job with force
                    if force:
                        for etl_day in date_list:
                            tmp = models.TaskInstance(
                            ).direct_run_single_job_task(etl_day, job)
                            if tmp > 0:
                                instance_list.extend(tmp)
                        break
                    else:
                        # run single waiting dependency
                        pass

                # add self
                job_list.append(task_id)
                need_run_job_list = models.TaskDefine().get_job_by_task_id(
                    task_id_list=job_list)
                if need_run_job_list and len(need_run_job_list) > 0:
                    for etl_day in date_list:
                        models.TaskInstance().create_job_task_instance(
                            execute_date=etl_day, job_list=need_run_job_list)
                    msg = "generate {} TaskDefine task instance ".format(
                        len(need_run_job_list) * len(date_list))
                break

            # run cron task
            cron = models.CronConf().get_cron_by_task_id(task_id=task_id)
            if cron:
                run_type = "cron"
                for etl_day in date_list:
                    tmp = models.TaskInstance().create_cron_task_instance(
                        execute_date=etl_day, cron_list=[cron])
                    if len(tmp) > 0:
                        instance_list.extend(tmp)
                break

        for instance in instance_list:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.info("push task to queue, instance {}, err {}".format(
                instance, err))
        if len(instance_list) > 0:
            msg = "generate {} {} task instance ".format(
                len(instance_list), run_type)
        return msg