コード例 #1
0
 def __init__(self):
     self.logger = Logger("executor").getlog()
     self.aps_log = Logger("apscheduler").getlog()
     self.config = ConfigUtil()
     self.dboption = DBOption()
     self.process_running = {}
     self.scheduler = BlockingScheduler()
     self.monitor = Monitor()
コード例 #2
0
ファイル: smsutil.py プロジェクト: zixuedanxin/etl_schedule
class SMSUtil(object):
    def __init__(self):
        self.config = ConfigUtil()

    '''
    phone 用 , 分割
    content 内容
    '''

    def send(self, phone, content):
        data = {
            "mobile": phone,
            "template": "super_template",
            "data": {
                "content": content
            }
        }
        host = self.config.get("sms.host")
        request = urllib2.Request(url="http://" + host +
                                  "/api/v1/sms/send/template",
                                  data=json.dumps(data))
        request.add_header('Content-Type', 'application/json')
        response = urllib2.urlopen(request)
        rep = response.read()
        return rep
コード例 #3
0
 def export(self, outfilename, dbname=None):
     if dbname == None:
         dbname = self.datasource.get_db_name()
     outputdir = ConfigUtil.instance().outputdir
     outpath = os.path.join(os.getcwd(), outputdir)
     mkdir_p(outpath)
     outpath = os.path.join(outpath, outfilename)
     self.export_output(outpath, dbname)
コード例 #4
0
ファイル: mylog.py プロジェクト: wujiyu115/schema_export
def install_log():
    log_level = str(ConfigUtil.instance(
    ).log_level if hasattr(ConfigUtil.instance(), "log_level") else "INFO")
    # 分割log
    if sys.hexversion > 0x20700f0:
        logging.config.dictConfig(
            yaml.load(open(ConfigUtil.instance().loggingyaml, 'r')))
    else:
        log_file = 'logging.%s-%d.log' % (time.strftime(
            "%Y-%m-%d--%H-%M-%S", time.localtime()), os.getpid())
        timelog = logging.handlers.TimedRotatingFileHandler(
            log_file, 'midnight', 1, 0)
        logger.addHandler(timelog)
    logger = logging.getLogger()
    logger.setLevel(log_level)

    stdout_encoding()
コード例 #5
0
ファイル: logutil.py プロジェクト: zixuedanxin/etl_schedule
    def __init__(self, name):
        self.config = ConfigUtil()
        # 创建一个logger
        self.logger = logging.getLogger(name)
        self.logger.setLevel(logging.INFO)

        formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')

        # 用于写入日志文件
        log_path = self.config.get("shcedule.log.path")
        if not os.path.exists(log_path):
            os.makedirs(log_path)
        log_file = log_path + "/" + name + ".log"
        handler = logging.handlers.TimedRotatingFileHandler(log_file, when="midnight", backupCount=10)
        # 输出到控制台
        # handler = logging.StreamHandler()
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)
コード例 #6
0
ファイル: dbutil.py プロジェクト: zixuedanxin/etl_schedule
    def __init__(self):
        self.config = ConfigUtil()
        self.host = self.config.get("mysql.host")
        self.port = self.config.get("mysql.port")
        self.db = self.config.get("mysql.db")
        self.username = self.config.get("mysql.username")
        self.password = self.config.get("mysql.password")
        self.logger = Logger("db").getlog()

        self.pool = PooledDB(creator=MySQLdb,
                             mincached=1,
                             maxcached=20,
                             host=self.host,
                             port=int(self.port),
                             user=self.username,
                             passwd=self.password,
                             db=self.db,
                             use_unicode=True,
                             charset="utf8")
コード例 #7
0
class RunCommand(object):
    def __init__(self):
        self.config = ConfigUtil()
        self.dboption = DBOption()


    # run command
    def run_command(self, job_name):
        try:

            etl_job = self.dboption.get_job_info(job_name)

            job_script = etl_job["job_script"]

            job_script = self.config.get("job.script.path") + "/script/" + job_script

            print(" 需要运行的Job名称:" + job_name + " 脚本位置:" + job_script)

            if not os.path.exists(job_script):
                raise Exception("can't find run script:" + job_script)

            extend = os.path.splitext(job_script)[1]

            if extend == ".py":
                child = subprocess.Popen([CommonUtil.python_bin(self.config), job_script, "-job", job_name],
                                         stdout=None,
                                         stderr=subprocess.STDOUT,
                                         shell=False)
                if child is None:
                    raise Exception("创建子进程运行脚本:" + job_script + "异常")
                    print("创建子进程:" + str(child.pid))
                    code = child.wait()
                    return code

            elif extend == ".sh":
                child = subprocess.Popen([job_script, "-job", job_name],
                                         stdout=None,
                                         stderr=subprocess.STDOUT,
                                         shell=True)
                if child is None:
                    raise Exception("创建子进程运行脚本:" + job_script + "异常")
                    print("创建子进程:" + str(child.pid))
                    code = child.wait()
                    return code

            elif extend == ".yml":
                return self.run_yaml(job_script)

            else:
                raise Exception("当前只支持 python , shell , yaml 脚本")
        except Exception, e:
            print("Executor 上 job 运行失败:" + job_name)
            print(traceback.format_exc())
            return 1
コード例 #8
0
ファイル: dbutil.py プロジェクト: zixuedanxin/etl_schedule
class DBUtil(object):
    def __init__(self):
        self.config = ConfigUtil()
        self.host = self.config.get("mysql.host")
        self.port = self.config.get("mysql.port")
        self.db = self.config.get("mysql.db")
        self.username = self.config.get("mysql.username")
        self.password = self.config.get("mysql.password")
        self.logger = Logger("db").getlog()

        self.pool = PooledDB(creator=MySQLdb,
                             mincached=1,
                             maxcached=20,
                             host=self.host,
                             port=int(self.port),
                             user=self.username,
                             passwd=self.password,
                             db=self.db,
                             use_unicode=True,
                             charset="utf8")

    def get_connection(self):
        try:
            mysql_con = self.pool.connection()
            return mysql_con
        except Exception, e:
            self.logger.error(e)
            for i in range(3):
                try:
                    time.sleep(5)
                    mysql_con = self.pool.connection()
                    return mysql_con
                except Exception, e:
                    self.logger.error(e)
                    self.logger.error("数据库连接异常执行" + str(i + 1) + "次连接")
            return None
コード例 #9
0
ファイル: smsutil.py プロジェクト: zixuedanxin/etl_schedule
 def __init__(self):
     self.config = ConfigUtil()
コード例 #10
0
class ETLUtil(object):
    def __init__(self):
        self.dboption = DBOption()
        self.config = ConfigUtil()
        self.time_format = "时间触发格式: Job名称,time,触发天(每天0),触发小时,触发分钟,触发周期(day|month),负责人,脚本位置"
        self.depdency_format = "依赖触发格式:Job名称,dependency,依赖Job(多个用空格分隔),触发Job,负责人,脚本位置"

    def check_trigger_job(self, day, interval):
        if int(day) == 0 and interval == 'month':
            print("day =0 表示每天触发, 触发周期应该为day")
            return False
        if int(day) != 0 and interval == 'day':
            print("触发周期为day 表示每天,需要设置触发天为 0")
            return False
        return True

    def remove_etl_job(self, job_name, should_check=True):
        job_info = self.dboption.get_job_info(job_name)
        if job_info is None:
            print(job_name + " 不存在,无法删除")
            return
        if should_check:
            # 判断是否被依赖
            dependened_jobs = self.dboption.get_depended_job(job_name)
            depended = False
            if dependened_jobs is not None:
                for dependened_job in dependened_jobs:
                    print(dependened_job["job_name"] + " 依赖 " + job_name)
                    depended = True
            if depended:
                print(job_name + " 被依赖无法删除")
                return
        stream_jobs = self.dboption.get_stream_job(job_name)
        stream_job_set = set()
        for stream_job in stream_jobs:
            stream_job_set.add(stream_job["job_name"])
        print("删除 job_name:" + job_name + " 不会删除触发执行的job:[" + ",".join(stream_job_set, ) + "]!!!")
        # 删除时间触发
        self.dboption.remove_etl_job_trigger(job_name)
        # 删除 触发
        self.dboption.remove_etl_stream_job(job_name)
        # 删除 依赖
        self.dboption.remove_etl_dependency_job(job_name)
        # 删除 配置
        self.dboption.remove_etl_job(job_name)

    def parse_line(self, line):
        if line is None or len(line.strip()) == 0:
            # print("读取的记录为空")
            pass
        else:
            line = line.strip()
            print line
            line_array = line.split(",")
            job_name = line_array[0].upper()
            job_info = self.dboption.get_job_info(job_name)
            job_trigger_info = self.dboption.get_etl_job_trigger(job_name)
            if job_info or job_trigger_info:
                # raise Exception("Job:" + job_name + " 已经存在")
                print("Job:" + job_name + " 已经存在,需要删除后重新创建!")
                self.remove_etl_job(job_name, should_check=False)
            trigger_type = line_array[1]
            man = line_array[len(line_array) - 2]
            script = line_array[len(line_array) - 1].strip()
            script_path = self.config.get("job.script.path") + "/script/" + script
            self.check_script_path(script_path)
            if trigger_type and trigger_type in ("time", "dependency"):
                if trigger_type == "time":
                    day = line_array[2]
                    hour = line_array[3]
                    minute = line_array[4]
                    interval = line_array[5]
                    valid = self.check_trigger_job(day, interval)
                    if not valid:
                        raise Exception("Job 配置错误")

                    deps = line_array[6]
                    add_job_dep_sets = set()
                    if deps != man:
                        print "时间触发需要添加依赖:", deps
                        for dep_job in deps.split(" "):
                            job = self.dboption.get_job_info(dep_job)
                            if job is None:
                                print "依赖的 Job:" + str(dep_job) + " 不存在"
                                raise Exception("依赖Job :" + dep_job + " 不存在")
                            add_job_dep_sets.add(dep_job)
                        print("需要配置依赖:" + ",".join(add_job_dep_sets))

                    code = self.dboption.save_time_trigger_job(job_name, trigger_type,
                                                               day, hour, minute,
                                                               interval, man, script, add_job_dep_sets)

                    if code == 1:
                        print("添加时间触发Job 成功")
                    else:
                        print("添加时间触发Job 失败")

                elif trigger_type == "dependency":
                    dep_jobs = line_array[2].strip().upper()
                    stream = line_array[3].strip().upper()
                    # 依赖job,已经依赖的
                    add_job_dep_sets = set()
                    for dep_job in dep_jobs.split(" "):
                        job = self.dboption.get_job_info(dep_job)
                        if job is None:
                            print "依赖的 Job:" + str(dep_job) + " 不存在"
                            raise Exception("依赖Job :" + dep_job + " 不存在")
                        add_job_dep_sets.add(dep_job)
                    print("需要配置依赖:" + ",".join(add_job_dep_sets))
                    stream_job = self.dboption.get_job_info(stream)
                    if stream_job is None:
                        raise Exception("Job:" + stream + " 不存在")
                    code = self.dboption.save_depdency_trigger_job(job_name, trigger_type, add_job_dep_sets, stream,
                                                                   man,
                                                                   script,)
                    if code == 1:
                        print("添加依赖触发Job 成功")
                    else:
                        print("添加依赖触发 Job 失败")
            else:
                raise Exception("配置 job 触发方式 :" + str(trigger_type))
                print(self.time_format)
                print(self.depdency_format)
                raise Exception("配置格式错误")

    def check_script_path(self, path):
        exists = os.path.exists(path)
        if not exists:
            raise Exception("脚本:" + path + " 不存在")

    def query_etl_job(self, job_name):
        print("查询Job:" + job_name)
        etl_job = self.dboption.get_job_info(job_name)
        if etl_job is None:
            print("没有找Job:" + job_name)
        else:
            print("------job-----")
            print("job:" + str(etl_job))
            trigger = etl_job["job_trigger"]
            if trigger == "time":
                etl_job_trigger = self.dboption.get_etl_job_trigger(job_name)
                print("-------trigger-----")
                print(str(etl_job_trigger))
            dep_jobs = self.dboption.get_dependency_job(job_name)
            print("------deps-----")
            if dep_jobs is not None and len(dep_jobs) != 0:
                for dep_job in dep_jobs:
                    print(str(dep_job['dependency_job']))
            print("-----nexts-----")
            next_jobs = self.dboption.get_stream_job(job_name)
            if next_jobs is not None and len(next_jobs) != 0:
                for next_job in next_jobs:
                    print(str(next_job['stream_job']))
            print("-----before----")
            before = self.dboption.get_before_job(job_name)
            if before is not None:
                print(str(before['job_name']))

    def read_file(self, path):
        file_handler = open(path, 'r')
        for line in file_handler.readlines():
            if line is not None and len(line) > 0 and not line.startswith("#"):
                self.parse_line(line)

    def rename_job(self,etl_job_name):
        etl_job_array = etl_job_name.strip().split(" ")
        if etl_job_array is None or len(etl_job_array) != 3:
            raise Exception("修改job 名称格式: 原名称 新名称 脚本相对路径")
        from_job = etl_job_array[0].strip()
        to_job = etl_job_array[1].strip()
        print("修改的名称:" + from_job + " -> " + to_job)
        from_job_info = self.dboption.get_job_info(from_job)
        if not from_job_info:
            raise Exception("原 Job 名称 " + from_job + " 不存在")
        to_job_info = self.dboption.get_job_info(to_job)
        if to_job_info:
            raise Exception("新 Job 名称 " + from_job + " 已存在")

        yaml_file = etl_job_array[2].strip().lower()
        script_path = self.config.get("job.script.path") + "/script/" + yaml_file
        self.check_script_path(script_path)

        # trigger
        update_trigger = "update t_etl_job_trigger set job_name = %s where job_name = %s"
        self.dboption.execute_sql(update_trigger, (to_job, from_job))

        # stream
        update_stream_1 = "update t_etl_job_stream set job_name = %s where job_name = %s"
        self.dboption.execute_sql(update_stream_1, (to_job, from_job))
        update_stream_2 = "update t_etl_job_stream set stream_job = %s where stream_job = %s"
        self.dboption.execute_sql(update_stream_2, (to_job, from_job))

        # dependency
        update_dependency_1 = "update t_etl_job_dependency set job_name = %s where job_name = %s"
        self.dboption.execute_sql(update_dependency_1, (to_job, from_job))
        update_dependency_2 = "update t_etl_job_dependency set dependency_job = %s where dependency_job = %s"
        self.dboption.execute_sql(update_dependency_2, (to_job, from_job))

        # job
        update_job = "update t_etl_job set job_name = %s , job_script=%s where job_name = %s"
        self.dboption.execute_sql(update_job, (to_job, yaml_file, from_job))

        self.query_etl_job(to_job)
コード例 #11
0
 def __init__(self):
     self.dboption = DBOption()
     self.config = ConfigUtil()
     self.time_format = "时间触发格式: Job名称,time,触发天(每天0),触发小时,触发分钟,触发周期(day|month),负责人,脚本位置"
     self.depdency_format = "依赖触发格式:Job名称,dependency,依赖Job(多个用空格分隔),触发Job,负责人,脚本位置"
コード例 #12
0
class Executor(object):
    def __init__(self):
        self.logger = Logger("executor").getlog()
        self.aps_log = Logger("apscheduler").getlog()
        self.config = ConfigUtil()
        self.dboption = DBOption()
        self.process_running = {}
        self.scheduler = BlockingScheduler()
        self.monitor = Monitor()

    '''
     运行 t_etl_job_queue 中Pending状态的job
    '''

    def run_queue_job_pending(self):
        self.logger.info("\n")
        self.logger.info("... interval run run_queue_job_pending ....")
        try:
            self.check_process_state()  # 判断已有的进程状态

            logpath = self.config.get("job.log.path")
            if logpath is None or len(logpath.strip()) == 0:
                raise Exception("can't find slave job.log.path")
            if not os.path.exists(logpath):
                os.makedirs(logpath)
            today = DateUtil.get_today()
            today_log_dir = logpath + "/" + today
            if not os.path.exists(today_log_dir):
                os.makedirs(today_log_dir)
            queue_job = self.dboption.get_queue_job_pending()
            if queue_job is not None:
                job_name = queue_job["job_name"]
                etl_job = self.dboption.get_job_info(job_name)
                job_status = etl_job["job_status"]
                job_retry_count = etl_job["retry_count"]
                run_number = queue_job["run_number"]
                if not self.check_should_run(job_name, job_status,
                                             job_retry_count, run_number):
                    return

                logfile = today_log_dir + "/" + job_name + "_" + today + ".log." + str(
                    run_number)
                bufsize = 0
                logfile_handler = open(logfile, 'w', bufsize)
                python_bin = CommonUtil.python_bin(self.config)
                run_path = project_path + "/bin/" + "runcommand.py"
                child = subprocess.Popen(python_bin +
                                         [run_path, "-job", job_name],
                                         stdout=logfile_handler.fileno(),
                                         stderr=subprocess.STDOUT,
                                         shell=False)
                pid = child.pid
                if pid > 0:
                    self.logger.info("创建子进程:" + str(pid) + " 运行Job:" +
                                     str(job_name))
                    code = self.dboption.update_job_running(job_name)
                    if code != 1:
                        try:
                            self.logger.info("更新Job:" + job_name +
                                             " 运行状态为Running失败,停止创建的进程")
                            self.terminate_process(child, logfile_handler)
                        except Exception, e:
                            self.logger.error(e)
                            self.logger.error("terminate 子进程异常")
                            logfile_handler.flush()
                            logfile_handler.close()
                    else:
                        self.logger.info("更新Job:" + job_name + " 运行状态Running")
                        code = self.dboption.update_job_queue_done(
                            job_name)  # FixMe 事物问题
                        self.logger.info("更新Queue job:" + str(job_name) +
                                         " 状态为Done,影响行数:" + str(code))
                        if code != 1:
                            self.logger.error("更新Job Queue job:" + job_name +
                                              " 状态为Done失败")
                            self.terminate_process(child, logfile_handler)
                            self.logger.info("重新修改job_name:" + job_name +
                                             " 状态为Pending 等待下次运行")
                            self.dboption.update_job_pending_from_running(
                                job_name)
                        else:
                            self.process_running[child] = {
                                "logfile_handler": logfile_handler,
                                "job_name": job_name,
                                "pid": pid
                            }
                else:
                    self.logger.error("启动子进程异常pid:" + str(pid))
                    logfile_handler.flush()
                    logfile_handler.close()
            else:
コード例 #13
0
 def __init__(self):
     self.config = ConfigUtil()
     self.dbUtil = DBUtil()
     self.dboption = DBOption()
     self.smsUtil = SMSUtil()
コード例 #14
0
#!/usr/bin/python
# -*- coding:utf-8 -*-

import os
import sys
import MySQLdb
from configutil import ConfigUtil
from smsutil import SMSUtil
from dateutil import DateUtil

project_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(project_path)

configUtil = ConfigUtil()
smsUtil = SMSUtil()


def get_zeus_connection():
    host = configUtil.get("mysql.host")
    username = configUtil.get("mysql.username")
    password = configUtil.get("mysql.password")
    port = int(configUtil.get("mysql.port"))
    db = "db_zeus"
    connection = MySQLdb.connect(host,
                                 username,
                                 password,
                                 db,
                                 port,
                                 use_unicode=True,
                                 charset='utf8')
    return connection
コード例 #15
0
ファイル: pdf_format.py プロジェクト: wujiyu115/schema_export
import os

from reportlab.lib import fonts,colors
from reportlab.lib.pagesizes import letter, inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle,Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

from base_format import BaseFormat
from configutil import ConfigUtil


# ttf=os.path.join(os.getcwd(), 'templates',"simsun.ttf")
ttf=ConfigUtil.instance().pdfttf
ttfname = 'sim'
pdfmetrics.registerFont(TTFont(ttfname,ttf))

fonts.addMapping(ttfname, 0, 0,ttfname)
fonts.addMapping(ttfname, 0, 1, ttfname)

class PdfFormat(BaseFormat):

	def __init__(self, filename,headings):
		self.suffix = "pdf" #后缀
		super(PdfFormat, self).__init__(filename,headings)

		self.doc = SimpleDocTemplate(self.outname, pagesize=letter)
		self.elements = []
コード例 #16
0
 def __init__(self):
     self.scheduler = BlockingScheduler()
     self.logger = Logger("scheduler").getlog()
     self.aps_log = Logger("apscheduler").getlog()
     self.dboption = DBOption()
     self.config = ConfigUtil()
コード例 #17
0
 def __init__(self):
     self.config = ConfigUtil()
     self.dboption = DBOption()
コード例 #18
0
def is_debug():
    return ConfigUtil.instance().log_level == "DEBUG"