Esempio n. 1
0
_process_nums = 6

# 单任务的执行文件
_data_export_py = os.path.join(sys.path[0], "data_export.py")

# 日志路径
_log_path = os.path.join(config.LOG_PATH, "data_export")

if not os.path.exists(_log_path):
    os.makedirs(_log_path)

# mysql数据库链接
con = create_engine(get_engine_str("mysql").format(**config.DB_CONF),
                    poolclass=pool.NullPool)

Dlogger = config.get_logger("DataExportAuto")


def parallel_write_log(no, id, connection_name, db_name, table_name,
                       last_exec_date, retry_count):
    try:
        current_date = str(datetime.date.today())
        if last_exec_date == current_date:
            print("{no:<3} {table_name:<40}      今日已执行成功 skip.".format(
                no=no, table_name=db_name + "." + table_name))
            return ""

        full_log_path = "{log_path}/sqoop_export_{db_name}_{table_name}.log".format(
            log_path=_log_path, db_name=db_name, table_name=table_name)
        sh_cmd = "{python} -u {date_export_py} -w {connection_name} --db {db_name} --tb {table_name} &>> {full_log_path}\n". \
            format(python=config.PYTHON3,
Esempio n. 2
0
#!/usr/bin/env python
# coding=utf-8

import datetime
import pandas as pd
from sqlalchemy import create_engine, pool
from utility import get_engine_str
from conf import config

Dlogger = config.get_logger("DataRowCount")
mysql_engine_str = get_engine_str("mysql").format(**config.DB_CONF)
mysql_con = create_engine(mysql_engine_str, poolclass=pool.NullPool)


def update_rowcount(row):
    sql = "select count(1) as cnt from {table_name}".format(
        table_name=row["table_name"])
    db_conf = {
        "host": row["host"],
        "port": row["port"],
        "user": row["user"],
        "password": row["password"],
        "database": row["db_name"],
        "charset": "utf8"
    }
    engine_str = get_engine_str(row["db_type"]).format(**db_conf)
    con = create_engine(engine_str, poolclass=pool.NullPool)
    # Dlogger.debug(sql)
    try:
        result = pd.read_sql(sql, con)
        table_rows = result.iat[0, 0]
Esempio n. 3
0
#!/usr/bin/env python
# coding=utf-8

""" 选择shell执行日志更加详细 """

import re
import datetime
import subprocess
import pandas as pd
import argparse
from sqlalchemy import create_engine, pool
from conf import config
from utility import get_engine_str

Dlogger = config.get_logger("DataMerge")


def merge_data(pms_table):
    clinic_table = "pet_medical.mir_clinic"

    # HIS诊疗模块没有的表,老小暖的诊疗相关表数据
    if re.search("CEMRecord|CPrescriptions|CPrescriptionDetails|CPhysical|CEMRecordPhysical|CLabReport|CLabVirusDetails|PBCheckList|PXRaysList|PPathologyList", pms_table, re.I):
        if "cemrecord" in pms_table.lower():
            file_type = "rcfile"
        else:
            file_type = "orc"
        sql = """
        drop table {data_xiaonuan_final_table};
        create table {data_xiaonuan_final_table} stored as {file_type} as
        select * from {pms_table};
        """
Esempio n. 4
0
#!/usr/bin/env python
# coding=utf-8

import argparse
import sys
import re
import subprocess
import pandas as pd
from sqlalchemy import create_engine, pool
from conf import config
from utility import get_engine_str, is_valid, get_yesterday

Dlogger = config.get_logger("DataUnique")


def is_increase(hive_full_table):
    sql = """select filter,max_value from meta_import where lower(hive_database)=lower('{hive_database}') and lower(hive_table)=lower('{hive_table}') limit 1;""" \
        .format(hive_database=hive_full_table.split(".")[0],
                hive_table=hive_full_table.split(".")[1])
    engine_str = get_engine_str("mysql").format(**config.DB_CONF)
    con = create_engine(engine_str, poolclass=pool.NullPool)
    Dlogger.info(sql)
    # rows=con.execute(sql)
    df = pd.read_sql(sql=sql, con=con)
    if not df.empty:
        filter = df.iat[0, 0]
        max_value = df.iat[0, 1]
        if is_valid(filter) and is_valid(max_value):
            return True
    return False
Esempio n. 5
0
#!/usr/bin/env python
# coding=utf-8

import sys
from sqlalchemy import create_engine, pool
import pandas as pd
import argparse
from conf import config
from utility import get_engine_str
from schema_check import get_tabs

Dlogger = config.get_logger("InsertMetaData")


def pre_args():
    parse = argparse.ArgumentParser(prog="InsertMetaData", description="I am help message...")
    parse.add_argument("-w", "--wizard", required=True, help="wizard,选择已经添加的数据库配置名称. example: -w warmsoft")
    parse.add_argument("--db", required=True, help="database,指定需要同步的数据库名称")
    parse.add_argument("--target_db", help="指定同步到hive中的库名,不填默认和db相同")
    args = parse.parse_args()
    print(args)

    args_dict = {
        "connection_id": None,
        "connection_name": args.wizard,
        "db_type": "",
        "host": "",
        "user": "",
        "password ": "",
        "port": 0,
        "jdbc_extend": "",
Esempio n. 6
0
#!/usr/bin/env python
# coding=utf-8

from sqlalchemy import create_engine, pool
import pandas as pd
import re
import subprocess
import traceback
from conf import config
from utility import get_engine_str, send_mail

# Dlogger.basicConfig(level=Dlogger.INFO, format='%(asctime)s - %(levelname)s: %(message)s')

Dlogger = config.get_logger("SchemaCheck")


def get_tabs(db_type, db_conf):
    tabs = []
    if db_type == "sqlserver":
        sql = "select name as table_name from sys.tables;"
        Dlogger.info("MSSQL Command = " + sql)
    elif db_type == "mysql":
        sql = "select table_name from information_schema.tables t where t.table_schema='{db_name}'".format(
            db_name=db_conf["database"])
        Dlogger.info("MySQL Command = " + sql)
    elif db_type == "oracle":
        sql = "select table_name from user_tables"
        Dlogger.info("Oracle command = " + sql)
    else:
        raise Exception("DATABASE TYPE ERROR !")
    engine_str = get_engine_str(db_type).format(**db_conf)
Esempio n. 7
0
# coding=utf-8

import pandas as pd
from sqlalchemy import create_engine, pool
import argparse
import subprocess
import datetime
import sys
import os
import re
import schema_check
from conf import config
from utility import get_engine_str, get_yesterday, is_valid
from data_unique import drop_duplicates

Dlogger = config.get_logger("DataImport")


def pre_args():
    parse = argparse.ArgumentParser(prog="DataImport",
                                    description="I am help message...")
    parse.add_argument("-w",
                       "--wizard",
                       required=True,
                       help="wizard,选择已经添加的数据库配置名称. example: -w warmsoft")
    parse.add_argument("--db",
                       default="",
                       help="<database> meta_import中的db_name库名,不区分大小写")
    parse.add_argument("--tb",
                       default="",
                       help="<table_name> meta_import中的table_name表名,不区分大小写")
Esempio n. 8
0
import pandas as pd
from sqlalchemy import create_engine, pool
import pymysql
import argparse
import subprocess
import datetime
import sys
import os
import re
import schema_check
import traceback
from conf import config
from utility import get_engine_str, get_yesterday, is_valid

Dlogger = config.get_logger("DataExport")


def pre_args():
    parse = argparse.ArgumentParser(
        prog="DataExport",
        description=
        "I am help message...默认模式是把数据导入到临时表,然后rename为正式表。Example1: python3 data_export.py -w xiaonuan_ddl --db xiaonuan --tb syscategory   Example2: python3 data_export.py -w xiaonuan_ddl --s data_xiaonuan_final.syscategory --t syscategory --mode=overwrite"
    )
    parse.add_argument("-w",
                       "--wizard",
                       required=True,
                       help="wizard,选择已经添加的数据库配置名称. example: -w xiaonuan_ddl")
    parse.add_argument("--db",
                       default="",
                       help="<database> meta_export中的db_name库名,不区分大小写")
Esempio n. 9
0
# 并行数
_process_nums = 8

# 任务失败重试次数
_retry_count = 3

# 单任务的执行文件
_data_merge_py = os.path.join(sys.path[0], "data_merge.py")

# 日志路径
_log_path = os.path.join(config.LOG_PATH, "data_merge")

if not os.path.exists(_log_path):
    os.makedirs(_log_path)

Dlogger = config.get_logger("DataMergeAuto")


def parallel_write_log(no, tb):
    try:
        table_name = "mid_" + tb.lower()
        input_table_name = "pet_medical.ods_pms_" + tb.lower()
        full_log_path = "{log_path}/data_merge_{table_name}.log".format(
            log_path=_log_path, table_name=table_name)
        sh_cmd = "{python} -u {data_merge_py} -t {input_table_name} &>> {full_log_path}\n". \
            format(python=config.PYTHON3,
                   data_merge_py=_data_merge_py,
                   input_table_name=input_table_name,
                   full_log_path=full_log_path)
        flag = ""