Example #1
0
 def vars_map(self, key, value, init_day, format=None):
     init_date = DateUtil.parse_date(init_day, None)
     if key == 'today':
         if value is None:
             return DateUtil.get_now_fmt(format, init_date)
         else:
             return value
     elif key == 'yesterday':
         if value is None:
             return DateUtil.get_yesterday_fmt(format, init_date)
         else:
             return value
     elif key == 'intervalday':
         if value is None:
             raise Exception("interval day is none")
         return DateUtil.get_interval_day_fmt(int(value), format, init_date)
     elif key == 'lastMonth':
         if value is None:
             return DateUtil.get_last_month(init_date)
         else:
             return value
     elif key == 'currentMonth':
         if value is None:
             return DateUtil.get_current_month(init_date)
         else:
             return value
     elif key == 'yesterdayMonth':
         if value is None:
             return DateUtil.get_yesterday_month(init_date)
         else:
             return value
     else:
         return value
Example #2
0
 def run(self):
     current_time = DateUtil.get_now()
     second = current_time.second
     time.sleep(60 - second)
     trigger_job = self.scheduler.add_job(self.sched_trigger_run, 'interval', minutes=1)
     self.logger.info("添加处理时间触发任务:" + str(trigger_job))
     run_job = self.scheduler.add_job(self.sched_job_run, 'interval', seconds=30)
     self.logger.info("添加处理Job运行任务" + str(run_job))
     self.scheduler.start()
Example #3
0
    def get_timetrigger_job(self, current):
        dstring = DateUtil.format_year_second(current)
        day = DateUtil.get_time_day(current)
        hour = DateUtil.get_time_hour(current)
        minute = DateUtil.get_time_minute(current)
        week_day = DateUtil.get_week_day(current)
        self.logger.info("获取start_hour:" + str(hour) + " start_minute:" + str(minute) + " 运行的 Job")
        time_jobs = self.dboption.get_time_trigger_job(hour, minute)
        if time_jobs is None or len(time_jobs) == 0:
            self.logger.info(dstring + " 没有需要运行的时间触发Job")
            return
        else:
            try:
                for job in time_jobs:
                    job_name = job["job_name"]
                    trigger_type = job["trigger_type"]
                    record = 0
                    should_run = False
                    if trigger_type == "day":  # 每天运行
                        should_run = True
                    elif trigger_type == "month":  # 每月运行
                        start_day = job["start_day"]
                        if int(start_day) == day:
                            should_run = True
                    elif trigger_type == "week":  # 每周运行
                        start_day = job["start_day"]
                        if int(start_day) == week_day:
                            should_run = True

                    if should_run:
                        record = self.dboption.update_trigger_job_pending(current, job_name)
                        if record == 1:
                            self.logger.info("更新时间触发Job:" + job_name + " 状态为Pending")
                        else:
                            self.logger.error("更新时间触发Job :" + job_name + " 状态为Pending失败")
                    else:
                        self.logger.info("时间触发 Job:" + job_name + " 没有对应时间触发执行方式 trigger_type:" + str(trigger_type))

            except Exception, e:
                self.logger.error(e)
                self.logger.error("处理时间触发Job异常")
Example #4
0
 def reslove_job_dep(self, job):
     today = DateUtil.format_year_day(DateUtil.get_now())
     job_name = job["job_name"]
     dep_jobs = self.dboption.get_dependency_job_status(job_name)  # job_name,status
     self.logger.info("job_name:" + str(job_name) + " 依赖的 job数量:" + str(self.get_list_length(dep_jobs)))
     should_run = True
     for dep_job in dep_jobs:
         dep_job_name = dep_job["job_name"]
         dep_job_status = dep_job["job_status"]
         dep_job_last_run_date = dep_job["last_run_date"]  # 最后一次运行日期
         self.logger.info("job_name:" + job_name + " 依赖的Job:" + dep_job_name + " 运行状态:" + str(dep_job_status)
                          + " 最后运行时间:" + str(dep_job_last_run_date))
         if (dep_job_last_run_date and dep_job_last_run_date != today) or dep_job_last_run_date is None:
             should_run = False
             self.logger.info("因Job:" + job_name + " 依赖Job:" + dep_job_name +
                              "最后运行时间:" + str(dep_job_last_run_date) + "不是:" + today + ",无法运行")
             break
         if dep_job_status != "Done":
             should_run = False
             self.logger.info("因Job:" + job_name + " 依赖Job:"
                              + dep_job_name + " 没有运行完成状态:" + dep_job_status + ",无法运行")
             break
     return should_run
Example #5
0
def run():
    date_str = DateUtil.get_now_fmt("%Y%m%d")
    query_date_str = date_str + "0" * 10
    zeus_connection = get_zeus_connection()
    query_sql = """select status,count(id) as action_count from zeus_action where id >= %s group by status """
    cursor = zeus_connection.cursor(MySQLdb.cursors.DictCursor)
    cursor.execute(query_sql, (query_date_str, ))
    rows = cursor.fetchall()
    null_count = 0
    success_count = 0
    running_count = 0
    failed_count = 0
    other_count = 0
    for row in rows:
        status = row["status"]
        if status is None:
            null_count = row["action_count"]
        elif status == "failed":
            failed_count = row["action_count"]
        elif status == "running":
            running_count = row["action_count"]
        elif status == "success":
            success_count = row["action_count"]
        else:
            print("other:" + str(status))
            other_count = row["action_count"]
    total_count = null_count + success_count + running_count + failed_count + other_count
    msg = [
        "总的任务数:" + str(total_count), "未运行任务数:" + str(null_count),
        "运行中任务数:" + str(running_count), "运行完成任务数:" + str(success_count),
        "运行失败任务数:" + str(failed_count)
    ]
    content = date_str + " 运行日志信息:\n" + str(",\n".join(msg)) + "\n"
    query_user_sql = """select phone from zeus_user where is_effective = 1 and uid !='admin' """
    cursor.execute(query_user_sql, ())
    rows = cursor.fetchall()
    phones = set()
    for row in rows:
        phones.add(row["phone"])
    response = smsUtil.send(",".join(phones), content)
    print(response)
Example #6
0
    def run(self):
        today = DateUtil.get_now_fmt(None)
        msg = []
        connection = self.dbUtil.get_connection()
        cursor = connection.cursor(MySQLdb.cursors.DictCursor)

        self.run_count(cursor, today, msg)

        self.run_check(cursor, today, msg)

        connection.close()

        main_phones = self.dboption.get_main_man_by_role("admin")
        phones = set()
        for main_phone in main_phones:
            phones.add(main_phone['user_phone'])
        if not phones or len(phones) == 0:
            print("没有配置短信发送phone")
            return
        content = today + " 运行日志信息:\n" + str(",\n".join(msg))
        response = self.smsUtil.send(",".join(phones), content)
        print(response)
Example #7
0
def get_dependency(cursor, job_name, dep_jobs):
    dep_sql = "select job_name,dependency_job from t_etl_job_dependency where job_name = %s"
    cursor.execute(dep_sql, (job_name,))
    deps = cursor.fetchall()
    for dep_job in deps:
        dep_jobs.add(dep_job["dependency_job"])
        get_dependency(cursor, dep_job["dependency_job"], dep_jobs)
    return dep_jobs

if __name__ == '__main__':

    dbUtil = DBUtil()
    connection = dbUtil.get_connection()
    cursor = connection.cursor(MySQLdb.cursors.DictCursor)

    today = DateUtil.get_now_fmt()
    job_sql = "select job_name,last_start_time,last_end_time from t_etl_job where 1=1 "

    cursor.execute(job_sql + " and last_run_date=%s", (today,))
    jobs = cursor.fetchall()

    count = 0
    failed = 0
    error = 0
    for job in jobs:
        job_name = job["job_name"]
        job_start_time = datetime.datetime.strptime(job["last_start_time"], "%Y-%m-%d %H:%M:%S")
        dep_jobs = set()
        get_dependency(cursor, job_name, dep_jobs)
        for dep_job in dep_jobs:
            cursor.execute(job_sql + " and job_name = %s", (dep_job,))
Example #8
0
 def export_command(self, python_path, project_path, command_key,
                    command_value, init_day):
     mysql2hive = project_path + '/export/mysql2hive.py'
     mongo2hive = project_path + '/export/mongo2hive.py'
     hive2mysql = project_path + '/export/hive2mysql.py'
     hive2excel = project_path + '/export/hive2excel.py'
     odps2hive = project_path + '/export/odps2hive.py'
     command_list = []
     command_list += python_path
     if command_key == 'mysql2hive':
         command_list.append(mysql2hive)
         command_list.append("--from")
         # mysql 分表处理
         mysql_db = self.replace_mysql_db(command_value['mysql_db'],
                                          init_day)
         command_list.append(mysql_db)
         command_list.append("--to")
         command_list.append(command_value['hive_db'])
         if command_value.has_key(
                 "include_columns") and command_value['include_columns']:
             command_list.append("--columns")
             command_list.append(command_value['include_columns'])
         if command_value.has_key(
                 "exclude_columns") and command_value['exclude_columns']:
             command_list.append("--exclude-columns")
             command_list.append(command_value['exclude_columns'])
         vars = {}
         if command_value.has_key("vars") and command_value["vars"]:
             vars = command_value["vars"]
         if command_value.has_key(
                 "partition") and command_value['partition']:
             command_list.append("--partition")
             partition_value = command_value['partition']
             partition_value = self.replace_sql_param(
                 partition_value, vars, init_day)
             command_list.append(partition_value)
         if command_value.has_key("where") and command_value['where']:
             command_list.append("--where")
             partition_value = command_value['where']
             partition_value = self.replace_sql_param(
                 partition_value, vars, init_day)
             command_list.append(partition_value)
         if command_value.has_key(
                 "query_sql") and command_value['query_sql']:
             command_list.append("--query-sql")
             command_list.append(command_value['query_sql'])
         return command_list
     if command_key == 'mongo2hive':
         command_list.append(mongo2hive)
         command_list.append("--file")
         command_list.append(command_value["yaml_file"])
         command_list.append("--from")
         command_list.append(command_value["mongo_db"])
         command_list.append("--to")
         command_list.append(command_value["hive_db"])
         command_list.append("--init")
         if init_day is None:
             init_day = DateUtil.get_now_fmt()
         command_list.append(init_day)
         vars = {}
         if command_value.has_key("vars") and command_value["vars"]:
             vars = command_value["vars"]
         if command_value.has_key(
                 'partition') and command_value['partition']:
             command_list.append("--partition")
             partition_value = command_value['partition'].strip()
             partition_value = self.replace_sql_param(
                 partition_value, vars, init_day)
             command_list.append(partition_value)
         return command_list
     if command_key == 'hive2mysql':
         command_list.append(hive2mysql)
         vars = {}
         if command_value.has_key("vars") and command_value["vars"]:
             vars = command_value["vars"]
         if command_value.has_key(
                 "delete_sql") and command_value["delete_sql"]:
             command_list.append("--sql")
             sql = self.replace_sql_param(command_value["delete_sql"], vars,
                                          init_day)
             command_list.append(sql)
         if command_value.has_key("query") and command_value["query"]:
             command_list.append("--query")
             hql = self.replace_sql_param(command_value["query"], vars,
                                          init_day)
             command_list.append(hql)
         command_list.append("--hive")
         command_list.append(command_value['hive_db'])
         command_list.append("--to")
         command_list.append(command_value['mysql_db'])
         command_list.append("--columns")
         command_list.append(command_value['mysql_columns'])
         return command_list
     if command_key == 'hive2excel':
         command_list.append(hive2excel)
         vars = {}
         if command_value.has_key("vars") and command_value["vars"]:
             vars = command_value["vars"]
         command_list.append("--name")
         command_list.append(command_value['excel_name'])
         command_list.append("--subject")
         command_list.append(command_value['email_subject'])
         command_list.append("--content")
         command_list.append(command_value['email_content'])
         if command_value.has_key("hive_db") and command_value["hive_db"]:
             command_list.append("--table")
             command_list.append(command_value['hive_db'])
         command_list.append("--receivers")
         command_list.append(command_value['email_receivers'])
         if command_value.has_key("query") and command_value["query"]:
             command_list.append("--query")
             hql = self.replace_sql_param(command_value["query"], vars,
                                          init_day)
             command_list.append(hql)
         return command_list
     if command_key == 'odps2hive':
         command_list.append(odps2hive)
         command_list.append("--from")
         command_list.append(command_value['odps_db'])
         command_list.append("--to")
         command_list.append(command_value['hive_db'])
         if command_value.has_key(
                 "include_columns") and command_value['include_columns']:
             command_list.append("--columns")
             command_list.append(command_value['include_columns'])
         if command_value.has_key(
                 "exclude_columns") and command_value['exclude_columns']:
             command_list.append("--exclude-columns")
             command_list.append(command_value['exclude_columns'])
         vars = {}
         if command_value.has_key("vars") and command_value["vars"]:
             vars = command_value["vars"]
         if command_value.has_key(
                 'partition') and command_value['partition']:
             command_list.append("--partition")
             partition_value = command_value['partition'].strip()
             partition_format = None
             if command_value.has_key('partition_format') and command_value[
                     'partition_format']:
                 partition_format = command_value['partition_format'].strip(
                 )
             partition_value = self.replace_sql_param(
                 partition_value, vars, init_day, partition_format)
             command_list.append(partition_value)
         return command_list
Example #9
0
    def run_queue_job_pending(self):
        self.logger.info("\n")
        self.logger.info("... interval run run_queue_job_pending ....")
        try:
            self.check_process_state()  # 判断已有的进程状态

            logpath = self.config.get("job.log.path")
            if logpath is None or len(logpath.strip()) == 0:
                raise Exception("can't find slave job.log.path")
            if not os.path.exists(logpath):
                os.makedirs(logpath)
            today = DateUtil.get_today()
            today_log_dir = logpath + "/" + today
            if not os.path.exists(today_log_dir):
                os.makedirs(today_log_dir)
            queue_job = self.dboption.get_queue_job_pending()
            if queue_job is not None:
                job_name = queue_job["job_name"]
                etl_job = self.dboption.get_job_info(job_name)
                job_status = etl_job["job_status"]
                job_retry_count = etl_job["retry_count"]
                run_number = queue_job["run_number"]
                if not self.check_should_run(job_name, job_status,
                                             job_retry_count, run_number):
                    return

                logfile = today_log_dir + "/" + job_name + "_" + today + ".log." + str(
                    run_number)
                bufsize = 0
                logfile_handler = open(logfile, 'w', bufsize)
                python_bin = CommonUtil.python_bin(self.config)
                run_path = project_path + "/bin/" + "runcommand.py"
                child = subprocess.Popen(python_bin +
                                         [run_path, "-job", job_name],
                                         stdout=logfile_handler.fileno(),
                                         stderr=subprocess.STDOUT,
                                         shell=False)
                pid = child.pid
                if pid > 0:
                    self.logger.info("创建子进程:" + str(pid) + " 运行Job:" +
                                     str(job_name))
                    code = self.dboption.update_job_running(job_name)
                    if code != 1:
                        try:
                            self.logger.info("更新Job:" + job_name +
                                             " 运行状态为Running失败,停止创建的进程")
                            self.terminate_process(child, logfile_handler)
                        except Exception, e:
                            self.logger.error(e)
                            self.logger.error("terminate 子进程异常")
                            logfile_handler.flush()
                            logfile_handler.close()
                    else:
                        self.logger.info("更新Job:" + job_name + " 运行状态Running")
                        code = self.dboption.update_job_queue_done(
                            job_name)  # FixMe 事物问题
                        self.logger.info("更新Queue job:" + str(job_name) +
                                         " 状态为Done,影响行数:" + str(code))
                        if code != 1:
                            self.logger.error("更新Job Queue job:" + job_name +
                                              " 状态为Done失败")
                            self.terminate_process(child, logfile_handler)
                            self.logger.info("重新修改job_name:" + job_name +
                                             " 状态为Pending 等待下次运行")
                            self.dboption.update_job_pending_from_running(
                                job_name)
                        else:
                            self.process_running[child] = {
                                "logfile_handler": logfile_handler,
                                "job_name": job_name,
                                "pid": pid
                            }
                else:
                    self.logger.error("启动子进程异常pid:" + str(pid))
                    logfile_handler.flush()
                    logfile_handler.close()
            else:
Example #10
0
    def sched_job_run(self):
        self.logger.info("... interval run sched_job_run ....")
        start_time = DateUtil.get_now()
        # 当前运行的任务数
        max_running_jobs = int(self.config.get("job.run.max"))
        running_jobs = self.dboption.get_running_jobs()
        if running_jobs is not None:
            count_running_jobs = len(running_jobs)
            if count_running_jobs > max_running_jobs:
                self.logger.info("当前运行的Job 数量:" + str(count_running_jobs) + "大于系统的最大任务数:" + str(max_running_jobs))
                return
        else:
            count_running_jobs = 0

        self.logger.info(str(start_time) + " 当前RUNNING状态的Job 数量:" + str(count_running_jobs))

        pending_jobs = self.dboption.get_pending_jobs()
        if pending_jobs is None or len(pending_jobs) == 0:
            count_pending_jobs = 0
            self.logger.info("当前Pending状态的 Job 数量:" + str(count_pending_jobs))
            return
        else:
            count_pending_jobs = len(pending_jobs)
            self.logger.info("当前Pending状态的 Job 数量:" + str(count_pending_jobs))

        require_jobs_count = max_running_jobs - count_running_jobs
        if require_jobs_count > 0:
            should_require_jobs = set()
            require_time = 0
            should_continue = True
            while True:
                if not should_continue:
                    break
                if len(should_require_jobs) == require_jobs_count:
                    break
                if require_time > 100:  # 强制的判断方式有点问题,判断100次
                    break
                self.logger.info("第" + str(require_time) + " 次运行")
                self.logger.info("可以运行的job数量:" + str(len(should_require_jobs)) + " 需要运行的job数量:" + str(require_jobs_count))
                pending_jobs = self.dboption.get_pending_jobs_by_require_time(require_time, max_running_jobs)
                for job in pending_jobs:
                    job_name = job["job_name"]
                    self.logger.info("判断Job:" + job_name + "依赖是否全部运行完成")
                    should_run = self.reslove_job_dep(job)
                    if should_run:  # FixMe 需要判断今天是否运行过
                        self.logger.info("job:" + job_name + " 依赖全部运行完成添加到可以运行的job列表中")
                        should_require_jobs.add(job_name)
                        if len(should_require_jobs) == require_jobs_count:
                            should_continue = False
                            break
                if pending_jobs is None or len(pending_jobs) < require_jobs_count:
                    self.logger.info("当前Pending状态的 Job 数量 " + str(self.get_list_length(pending_jobs)) +
                                     " 小于 需要运行的任务数 " + str(require_jobs_count) +",无需循环")
                    should_continue = False

                require_time += 1
            self.logger.info("需要运行的Job:" + str(should_require_jobs))
            self.run_job_command(should_require_jobs)
            end_time = DateUtil.get_now()
            self.logger.info("依赖触发调度执行时长:" + str(end_time - start_time))
        else:
            self.logger.info("当前运行的Job 数量:" + str(count_running_jobs) + "大于系统的最大任务数")
Example #11
0
 def sched_trigger_run(self, current_time = None):
     self.logger.info("... interval run sched_trigger_run ....")
     start_time = DateUtil.get_now()
     self.get_timetrigger_job(start_time)
     end_time = DateUtil.get_now()
     self.logger.info("时间触发调度运行时长:" + str(end_time - start_time))
Example #12
0
    reload(sys)
    sys.setdefaultencoding('utf-8')

    run_yaml = RunYaml()
    optParser = run_yaml.option_parser()
    options, args = optParser.parse_args(sys.argv[1:])

    if options.path is None:
        print("require yaml file")
        optParser.print_help()
        sys.exit(-1)

    start = options.start
    end = options.end
    if start is None:
        start = DateUtil.parse_date(DateUtil.get_now_fmt())
    else:
        start = DateUtil.parse_date(start)
    if end is None:
        end = DateUtil.parse_date(DateUtil.get_now_fmt())
    else:
        end = DateUtil.parse_date(end)

    run_code = []
    for p_day in DateUtil.get_list_day(start, end):
        print "运行时设置的日期:", p_day
        code = run_yaml.run_command(options.path, p_day)
        run_code.append(str(code))
        if code != 0:
            sys.exit(1)
    code_str = ",".join(run_code)