Ejemplo n.º 1
0
def is_increase(hive_full_table):
    sql = """select filter,max_value from meta_import where lower(hive_database)=lower('{hive_database}') and lower(hive_table)=lower('{hive_table}') limit 1;""" \
        .format(hive_database=hive_full_table.split(".")[0],
                hive_table=hive_full_table.split(".")[1])
    engine_str = get_engine_str("mysql").format(**config.DB_CONF)
    con = create_engine(engine_str, poolclass=pool.NullPool)
    Dlogger.info(sql)
    # rows=con.execute(sql)
    df = pd.read_sql(sql=sql, con=con)
    if not df.empty:
        filter = df.iat[0, 0]
        max_value = df.iat[0, 1]
        if is_valid(filter) and is_valid(max_value):
            return True
    return False
Ejemplo n.º 2
0
def main(row):
    # 1、建表语句判断和执行
    cols_list_map, hive_cols, map_column_hive, alter_sql = schema_check.check_db_to_hive(
        row["table_name"], row["hive_full_name"], row["db_conf"],
        row["db_type"])
    # print(cols_list_map)

    # 过滤指定的字段,用指定的字段创建表结构,主要是ehr的sqlserver那个数据库
    # 暂时没做columns和query_sql的冲突,判断校验
    if is_valid(row["columns"]):
        columns_tmp = row["columns"].strip().lower().split(",")
        cols_list_map = [
            x for x in cols_list_map if x["name"].lower() in columns_tmp
        ]
    if is_valid(row["query_sql"]):
        # columns_tmp = row["query_sql"].strip().lower()
        columns_tmp = re.sub("select | from.*", "",
                             row["query_sql"].strip().lower()).split(",")
        cols_list_map_tmp = []
        for column_name in columns_tmp:
            # 需要固定字段顺序
            cols_list_map_tmp.append([
                x for x in cols_list_map if x["name"].lower() == column_name
            ][0])
        cols_list_map = cols_list_map_tmp
        # print(cols_list_map)
    prepare_ddl(row["exec_engine"], row["hive_full_name"], cols_list_map,
                hive_cols, row["hive_partition_key"],
                row["hive_partition_value"], alter_sql, row["is_overwrite"])

    # 2、最大值sql语句判断和执行
    if len(hive_cols) > 0 and is_valid(row["max_value"]) and is_valid(
            row["filter"]):
        where_str = prepare_increase(row)
    else:
        if row["exec_engine"] == "sqoop":
            where_str = '--where "1=1" \t'
        else:
            where_str = "1=1"

    # 3、import data
    if row["exec_engine"] == "sqoop":
        sqoop_import(row, where_str, map_column_hive)
    else:  # exec_engine == "datax"
        if len(hive_cols) == 0 or len(alter_sql) != 0:
            hive_cols = cols_list_map
        datax_import(row, where_str, hive_cols)
    drop_duplicates(row["hive_full_name"], row["hive_partition_key"])
Ejemplo n.º 3
0
def prepare_ddl(exec_engine, hive_full_name, cols_list_map, hive_cols,
                hive_partition_key, hive_partition_value, alter_sql,
                is_overwrite):
    # 表结构语句判断
    schema_sql = ""
    if len(hive_cols) == 0:
        if exec_engine == "sqoop":
            # sqoop使用hive-import模式无法讲数据插入到orc格式中, 只能使用hcatalog模式, 但是这个模式的分区有权限问题
            # 使用hive-import模式, 就不需要建表语句了
            schema_sql = ""
        else:  # exec_engine=="datax"
            if is_valid(hive_partition_key) and is_valid(hive_partition_value):
                part_sql = " partitioned by ({} string)".format(
                    hive_partition_key)
            else:
                part_sql = ""
            # orc不能repalce字段,而且便于表结构一样就可以sqoop和datax任意切换,所以不用orc格式,使用sqoop默认的text格式
            # schema_sql = """create table if not exists {table} (\n{cols}\n        )\n{part_sql}        stored as orc;""" \
            schema_sql = """create table if not exists {table} (\n{cols}\n        ){part_sql}""" \
                .format(table=hive_full_name,
                        cols=''.join(["            {name} {type},\n".format(name=x["name"], type=x["type"]) for x in cols_list_map])[:-2],
                        part_sql=part_sql) \
                .lower()

    else:
        if len(alter_sql) > 0:
            schema_sql = "".join(alter_sql)

    # 删除数据判断
    delete_data_sql = ""
    if exec_engine == "datax" and is_overwrite == "1":
        if hive_partition_key and hive_partition_key:
            delete_data_sql = """
        alter table {hive_full_table} drop if exists partition ({hive_partition_key}='{hive_partition_value}')""". \
                format(hive_full_table=hive_full_name,
                       hive_partition_key=hive_partition_key,
                       hive_partition_value=hive_partition_value)
        else:
            delete_data_sql = """
        truncate table {hive_full_table}""". \
                format(hive_full_table=hive_full_name,
                       hive_partition_key=hive_partition_key,
                       hive_partition_value=hive_partition_value)

    # 分区语句判断
    add_partition_sql = ""
    if exec_engine == "datax" and is_valid(hive_partition_key) and is_valid(
            hive_partition_value):
        add_partition_sql = """
        alter table {hive_full_table} add if not exists partition ({hive_partition_key}='{hive_partition_value}')""" \
            .format(hive_full_table=hive_full_name,
                    hive_partition_key=hive_partition_key,
                    hive_partition_value=hive_partition_value)

    # 执行语句
    if schema_sql + delete_data_sql + add_partition_sql != "":
        engine_str = get_engine_str("hive").format(**config.HIVE_CONF)
        con = create_engine(engine_str, poolclass=pool.NullPool)
        if schema_sql != "":
            for tmp_schema_sql in [x for x in schema_sql.split(";") if x]:
                Dlogger.info("\n        " + tmp_schema_sql + ";")
                con.execute(tmp_schema_sql)
        if delete_data_sql != "":
            for tmp_drop_partition_sql in [
                    x for x in delete_data_sql.split(";") if x
            ]:
                Dlogger.info(tmp_drop_partition_sql + ";")
                con.execute(tmp_drop_partition_sql)
        if add_partition_sql != "":
            for tmp_add_partition_sql in [
                    x for x in add_partition_sql.split(";") if x
            ]:
                Dlogger.info(tmp_add_partition_sql + ";")
                con.execute(tmp_add_partition_sql)
Ejemplo n.º 4
0
def datax_generate_json(row, where_str, hive_cols):
    # print(str(hive_cols))
    host = row["host"]
    port = row["port"]
    database = row["db_name"]

    if is_valid(row["query_sql"]):
        query_sql = ' "querySql": ["{};"], '.format(row["query_sql"])
        table = ""
    else:
        query_sql = ""
        table = '"table": ["{}"],'.format(row["table_name"])

    if is_valid(row["columns"]):
        columns = ",".join(
            ['"' + x + '"' for x in row["columns"].strip().split(",")])
        columns = "[" + columns + "]"
    else:
        columns = '["*"]'

    if is_valid(row["hive_partition_key"]) and is_valid(
            row["hive_partition_value"]):
        hive_partition_str = "/{}={}".format(row["hive_partition_key"],
                                             row["hive_partition_value"])
    else:
        hive_partition_str = ""

    if row["db_type"] == "sqlserver":
        jdbc_driver = "jdbc:sqlserver://{host}:{port};DatabaseName={database}".format(
            host=host, port=port, database=database)
    elif row["db_type"] == "mysql":
        jdbc_driver = "jdbc:mysql://{host}:{port}/{database}".format(
            host=host, port=port, database=database)
    elif row["db_type"] == "oracle":
        jdbc_driver = "jdbc:oracle:thin:@{host}:{port}:{database}".format(
            host=host, port=port, database=database)
    else:
        raise Exception("DATABASE TYPE ERROR !")

    template_json = r"""{
    "setting": {},
    "job": {
        "setting": {
            "speed": {
                "channel": %s
            }
        },
        "content": [
            {
                "reader": {
                    "name": "%s",
                    "parameter": {
                        "username": "******",
                        "password": "******",
                        "column": %s,
                        "connection": [
                            {   
                                "jdbcUrl": ["%s"],
                                %s%s
                            }
                        ],
                        "where": "%s"
                    }
                },
                "writer": {
                    "name": "hdfswriter",
                    "parameter": {
                        "defaultFS": "%s",
                        "path": "/user/hive/warehouse/%s.db/%s%s",
                        "column": %s,
                        "fileName": "%s",
                        "fileType": "text",
                        "fieldDelimiter": "\u0001",
                        "writeMode": "append"
                    }
                }
            }
        ]
    }
}
    """ % (row["m"], row["db_type"] + "reader", row["user"], row["password"],
           columns, jdbc_driver, query_sql, table, where_str, config.HDFS,
           row["hive_database"], row["hive_table"], hive_partition_str,
           str(hive_cols), row["hive_table"])
    with open(
            os.path.join(
                config.PROJECT_PATH,
                "conf/datax_json/{}.json".format(row["hive_full_name"])),
            "w") as f:
        f.write(template_json)
    Dlogger.info("generate datax json succeed")
Ejemplo n.º 5
0
def sqoop_import(row, where_str, map_column_hive_str):
    host = row["host"]
    port = row["port"]
    db_name = row["db_name"]
    jdbc_extend = row["jdbc_extend"]
    username = "******".format(row["user"])
    password = "******".format(row["password"])
    table_name = "--table {} \t".format(
        row["table_name"] if row["db_type"].lower() != "oracle" else
        row["table_name"].upper())  # oracle表名大写
    hive_import = "--hive-import \t"
    hive_overwrite = "--hive-overwrite \t" if row["is_overwrite"] == "1" else ""
    hive_table = "--hive-table {} \t".format(row["hive_database"] + "." +
                                             row["hive_table"])
    # create_hcatalog_table = "--create-hcatalog-table \t" if is_exists != 1 else ""
    # hcatalog_database = "--hcatalog-database {} \t".format(row["hive_database"])
    # hcatalog_table = "--hcatalog-table {} \t".format(row["hive_table"])
    # hcatalog_storage_stanza = "--hcatalog-storage-stanza 'stored as orc' \t"
    # columns = "--columns '{}' \t".format(row["columns"]) if is_valid(row["columns"]) else ""
    # map_column_hive = " --map-column-hive " + row["map_column_hive"] if is_valid(row["map_column_hive"]) else ""
    map_column_hive = "--map-column-hive '{}' \t".format(
        map_column_hive_str) if len(map_column_hive_str) > 0 else ""
    hive_partition_key = "--hive-partition-key '{}' \t".format(
        row["hive_partition_key"]) if is_valid(
            row["hive_partition_key"]) else ""
    hive_partition_value = "--hive-partition-value '{}' \t".format(
        row["hive_partition_value"]) if is_valid(
            row["hive_partition_value"]) else ""
    fields_terminated_by = "--fields-terminated-by '{}' \t".format(
        row["fields_terminated_by"]) if is_valid(
            row["fields_terminated_by"]) else ""
    line_terminated_by = "--lines-terminated-by '{}' \t".format(
        row["line_terminated_by"]) if is_valid(
            row["line_terminated_by"]) else ""
    hive_drop_import_delims = "--hive-drop-import-delims \t"
    use_raw_null = "--null-string '\\\\N' --null-non-string '\\\\N' \t" if row[
        "use_raw_null"] == "1" else ""
    use_local_mode = " -jt local \t" if row["use_local_mode"] == "1" else " \t"
    warehouse_dir = "--warehouse-dir /tmp/`whoami`/{} \t".format(
        row["warehouse_dir"]) if row["warehouse_dir"] else ""
    class_name = "–-class-name '{}' \t".format(row["class_name"]) if is_valid(
        row["class_name"]) else ""
    outdir = "--outdir '{}' \t".format(row["outdir"]) if is_valid(
        row["outdir"]) else ""
    split_by = "--split-by '{}' \t" + row["split_by"] if is_valid(
        row["split_by"]) else ""
    m = "-m {}".format(row["m"]) if is_valid(row["m"]) else "-m 1"
    # use_direct = " --direct" if is_valid(row["use_direct"]) else ""
    use_direct = ""
    is_drop = row["is_drop"]
    columns = ""  # row["columns"]

    db_type = row["db_type"].lower()
    if db_type == "mysql":
        connect = "--connect 'jdbc:mysql://%s:%s/%s?%s' \t" % (
            host, port, db_name, jdbc_extend)
        # jdbc:mysql://10.15.1.11:3306/pms?useUnicode=true&characterEncoding=utf-8
    elif db_type == "sqlserver":
        connect = "--connect 'jdbc:sqlserver://%s:%s;database=%s' \t" % (
            host, port, db_name)
        # jdbc:sqlserver://10.15.1.11:2121;database=PMS
    elif db_type == "oracle":
        connect = "--connect 'jdbc:oracle:thin:@%s:%s:%s' \t" % (host, port,
                                                                 db_name)
        # jdbc:oracle:thin:@192.168.0.147:1521:ORCL
    else:
        raise Exception(
            "ERROR: THE DATABASE TYPE IS NULL OR %s NOT SUPPORT ." %
            row["db_type"])

    # 3、sqoop语句执行
    sqoop_cmd = "\n        sqoop import" + \
                use_local_mode + \
                connect + \
                username + \
                password + \
                table_name + \
                use_direct + \
                hive_import + \
                hive_overwrite + \
                hive_table + \
                columns + \
                where_str + \
                hive_partition_key + \
                hive_partition_value + \
                map_column_hive + \
                fields_terminated_by + \
                line_terminated_by + \
                hive_drop_import_delims + \
                use_raw_null + \
                warehouse_dir + \
                class_name + \
                outdir + \
                split_by + \
                m
    # sqoop_cmd = "\n        sqoop import" + use_local_mode + connect + username + password + table_name + use_direct + hcatalog_database + hcatalog_table + columns + where + hive_partition_key + hive_partition_value + map_column_hive + fields_terminated_by + line_terminated_by + hive_drop_import_delims + use_raw_null + class_name + outdir + split_by + create_hcatalog_table + hcatalog_storage_stanza + m
    sqoop_cmd = '\\\n'.join(["        " + x for x in sqoop_cmd.split("\t")])
    Dlogger.info("Shell Command = " + sqoop_cmd)
    subprocess.check_output(sqoop_cmd, shell=True)
Ejemplo n.º 6
0
def pre_args():
    parse = argparse.ArgumentParser(prog="DataImport",
                                    description="I am help message...")
    parse.add_argument("-w",
                       "--wizard",
                       required=True,
                       help="wizard,选择已经添加的数据库配置名称. example: -w warmsoft")
    parse.add_argument("--db",
                       default="",
                       help="<database> meta_import中的db_name库名,不区分大小写")
    parse.add_argument("--tb",
                       default="",
                       help="<table_name> meta_import中的table_name表名,不区分大小写")
    parse.add_argument("--exec_engine",
                       choices=["sqoop", "datax"],
                       default="sqoop",
                       help="执行引擎, sqoop或者datax")
    parse.add_argument("-s",
                       "--source_table",
                       default="",
                       help="source table. example: pms.ppets")
    parse.add_argument("-t",
                       "--target_table",
                       default="",
                       help="target table. example: pet_medical.src_pms_ppets")
    parse.add_argument("-m", "--num_pappers", default="1", help="map并行数,默认1个")
    parse.add_argument("--hive_overwrite", action="store_true", help="sqoop覆盖")
    parse.add_argument("--hive_partition_key", help="分区键")
    parse.add_argument("--hive_partition_value", help="分区值")
    parse.add_argument(
        "--use_local_mode",
        action="store_true",
        help=
        "本地模式执行. 集群只有一台机器有外网, 如果分布的任务到没有外网的机器上就不能执行, 就需要指定本地模式。外网的任务建议使用datax引擎"
    )

    args = parse.parse_args()
    # print parse
    print(args)

    args_dict = {
        "connection_id": "",
        "connection_name": "",
        "db_type": "",
        "host": "",
        "user": "",
        "password ": "",
        "port": 0,
        "jdbc_extend": "",
        "default_db": "",
        "db_name": "",
        "table_name": "",
        "hive_database": "",
        "hive_table": "",
        "is_overwrite": "",
        "query_sql": "",
        "columns": "",
        "filter": None,
        "max_value": None,
        "map_column_hive": "",
        "hive_partition_key": "",
        "hive_partition_value": "",
        "fields_terminated_by": None,
        "line_terminated_by": None,
        "use_raw_null": "1",
        "use_local_mode": "0",
        "warehouse_dir": "",
        "class_name": None,
        "outdir": None,
        "split_by": None,
        "m": None,
        "is_drop": "0",
        "exec_engine": "sqoop"
    }
    # table_names = []
    # conn_names = []

    wizard_name = args.wizard
    if args.source_table and len(args.source_table.split(".")) != 2:
        print("-s的参数必须是库名加表名,例如:pms.ppets")
        sys.exit(1)
    if args.target_table and len(args.target_table.split(".")) != 2:
        print("-t的参数必须是库名加表名,例如:pet_medical.ods_pmsweb_ppets")
        sys.exit(1)

    db = args.db.lower()
    tb = args.tb.lower()
    args_dict["exec_engine"] = args.exec_engine
    args_dict["db_name"] = args.source_table.split(
        ".")[0].lower() if args.source_table else ""
    args_dict["table_name"] = args.source_table.split(
        ".")[1].lower() if args.source_table else ""
    args_dict["hive_database"] = args.target_table.split(
        ".")[0].lower() if args.target_table else ""
    args_dict["hive_table"] = args.target_table.split(
        ".")[1].lower() if args.target_table else ""
    args_dict["m"] = args.num_pappers
    args_dict["is_overwrite"] = "1" if args.hive_overwrite else "0"
    args_dict["hive_partition_key"] = args.hive_partition_key
    args_dict["hive_partition_value"] = args.hive_partition_value
    args_dict["use_local_mode"] = "1" if args.use_local_mode else "0"

    sql = """
    SELECT t.connection_id,
           t.connection_name,
           t.db_type,
           t.host,
           t.user,
           t.password ,
           t.port,
           t.jdbc_extend,
           t.default_db,
           s.db_name,
           s.table_name,
           s.hive_database,
           s.hive_table,
           s.is_overwrite,
           s.query_sql,
           s.columns,
           s.filter,
           s.max_value,
           s.map_column_hive,
           s.hive_partition_key,
           s.hive_partition_value,
           s.fields_terminated_by,
           s.line_terminated_by,
           s.use_raw_null,
           s.use_local_mode,
           s.warehouse_dir,
           s.class_name,
           s.outdir,
           s.split_by,
           s.m,
           s.is_drop,
           s.exec_engine
      FROM meta_connections t
      LEFT JOIN meta_import s ON t.connection_id = s.connection_id
     ORDER BY t.connection_id
    """
    con = create_engine(get_engine_str("mysql").format(**config.DB_CONF),
                        poolclass=pool.NullPool)
    df = pd.read_sql(sql=sql, con=con)
    df["db_name"] = df["db_name"].map(lambda x: str(x).lower())
    df["table_name"] = df["table_name"].map(lambda x: str(x).lower())
    df["hive_database"] = df["hive_database"].map(lambda x: str(x).lower())
    df["hive_table"] = df["hive_table"].map(lambda x: str(x).lower())

    conn_names = df["connection_name"].drop_duplicates(keep="first").tolist()
    db_names = df[(df["db_name"].notna())
                  & (df["db_name"] != "")].drop_duplicates(
                      keep="first")["db_name"].tolist()
    table_names = df[(df["table_name"].notna())
                     & (df["table_name"] != "")].drop_duplicates(
                         keep="first")["table_name"].tolist()
    # print([x.lower() for x in table_names])
    if wizard_name not in conn_names:
        print("Error Message: -w 数据库链接名称不存在")
        sys.exit(1)
    if db != "" and db.lower() not in [x.lower() for x in db_names]:
        print("Error Message: --db 库名不存在", db.lower(), db_names)
        sys.exit(1)
    if tb != "" and tb.lower() not in [x.lower() for x in table_names]:
        print("Error Message: --tb 表名不存在", tb.lower(), table_names)
        sys.exit(1)
    if tb != "" and args_dict["table_name"] != "":
        print("Error Message: --tb -s 不能同时指定")
        sys.exit(1)

    # print(conn_names, table_names)
    # tmp_row = [row for row in rows if wizard_name == row["connection_name"]][0]
    tmp_row = df[df["connection_name"] == wizard_name].to_dict("records")[0]
    args_dict["connection_id"] = tmp_row["connection_id"]
    args_dict["connection_name"] = tmp_row["connection_name"]
    args_dict["db_type"] = tmp_row["db_type"]
    args_dict["host"] = tmp_row["host"]
    args_dict["user"] = tmp_row["user"]
    args_dict["password"] = tmp_row["password"]
    args_dict["port"] = tmp_row["port"]
    args_dict["jdbc_extend"] = tmp_row["jdbc_extend"]
    args_dict["default_db"] = tmp_row["default_db"]
    if db != "" and tb != "":
        # args_dict = df[(df["connection_name"] == wizard_name) & (df["table_name"] == table_name_meta.lower())].head(1).to_dict("records")[0]
        args_dict = df[(df["connection_name"] == wizard_name)
                       & (df["db_name"] == db) & (df["table_name"] == tb)]
        # 将pandas的特有类型nan处理成原生的None
        args_dict = args_dict.where(args_dict.notna(), None)
        # DataFrame转换成dict
        args_dict = args_dict.to_dict("records")[0]
        # print(df.dtypes)
        # print(args_dict)

    if (db != "" and tb != "") or (args_dict["db_name"] != ""
                                   and args_dict["table_name"] != ""
                                   and args_dict["hive_database"] != ""
                                   and args_dict["hive_table"] != ""):
        pass
    else:
        print("Error Message: 必须指定 -w --db --tb 或者 -w -d -s -t 的参数值")
        sys.exit(1)
    args_dict["hive_database"] = args_dict["hive_database"]
    args_dict["hive_table"] = args_dict["hive_table"]
    args_dict["hive_full_name"] = args_dict["hive_database"] + "." + args_dict[
        "hive_table"]
    args_dict["db_conf"] = {
        "host": args_dict["host"],
        "port": args_dict["port"],
        "user": args_dict["user"],
        "password": args_dict["password"],
        "database": args_dict["db_name"],
        "charset": "utf8"
    }
    if not is_valid(args_dict["db_name"]):
        args_dict["db_name"] = args_dict["default_db"]
    if args_dict["hive_partition_value"] == "$yesterday":
        args_dict["hive_partition_value"] = get_yesterday()
    if not is_valid(args_dict["m"]):
        args_dict["m"] = 1
    if args.num_pappers != '1':
        args_dict["m"] = args.num_pappers
    args_dict["m"] = int(args_dict["m"])
    print(args_dict)
    return args_dict
Ejemplo n.º 7
0
def pre_args():
    parse = argparse.ArgumentParser(
        prog="DataExport",
        description=
        "I am help message...默认模式是把数据导入到临时表,然后rename为正式表。Example1: python3 data_export.py -w xiaonuan_ddl --db xiaonuan --tb syscategory   Example2: python3 data_export.py -w xiaonuan_ddl --s data_xiaonuan_final.syscategory --t syscategory --mode=overwrite"
    )
    parse.add_argument("-w",
                       "--wizard",
                       required=True,
                       help="wizard,选择已经添加的数据库配置名称. example: -w xiaonuan_ddl")
    parse.add_argument("--db",
                       default="",
                       help="<database> meta_export中的db_name库名,不区分大小写")
    parse.add_argument("--tb",
                       default="",
                       help="<table_name> meta_export中的table_name表名,不区分大小写")
    parse.add_argument("--mode",
                       choices=["rename", "overwrite", "append"],
                       default="rename",
                       help="导入模式")
    parse.add_argument("--exec_engine",
                       choices=["sqoop", "datax"],
                       default="sqoop",
                       help="执行引擎, sqoop或者datax")
    parse.add_argument(
        "-s",
        "--source_table",
        default="",
        help="source table. example: pet_medical.ods_pmsweb_ppets")
    parse.add_argument("-t",
                       "--target_table",
                       default="",
                       help="target table. example: xiaonuan.ppets")
    parse.add_argument("-m", "--num_pappers", default="1", help="map并行数,默认1个")
    parse.add_argument(
        "--use_local_mode",
        action="store_true",
        help=
        "本地模式执行. 集群只有一台机器有外网, 如果分布的任务到没有外网的机器上就不能执行, 就需要指定本地模式。外网的任务建议使用datax引擎"
    )

    args = parse.parse_args()
    print(args)

    args_dict = {
        "connection_id": "",
        "connection_name": "",
        "db_type": "",
        "host": "",
        "user": "",
        "password ": "",
        "port": 0,
        "jdbc_extend": "",
        "default_db": "",
        "hive_database": "",
        "hive_table": "",
        "db_name": "",
        "table_name": "",
        "m": 1,
        "is_overwrite": "",
        "is_drop": "0",
        "mode": "rename",
        "exec_engine": "sqoop"
    }

    wizard_name = args.wizard
    if args.source_table and len(args.source_table.split(".")) != 2:
        print("-s的参数必须是库名加表名,例如:pet_medical.ods_pmsweb_ppets")
        sys.exit(1)
    if args.target_table and len(args.target_table.split(".")) != 2:
        print("-t的参数必须是库名加表名,例如:xiaonuan.ppets")
        sys.exit(1)

    db = args.db.lower()
    tb = args.tb.lower()
    args_dict["exec_engine"] = args.exec_engine
    args_dict["hive_database"] = args.source_table.split(
        ".")[0].lower() if args.source_table else ""
    args_dict["hive_table"] = args.source_table.split(
        ".")[1].lower() if args.source_table else ""
    args_dict["db_name"] = args.target_table.split(
        ".")[0].lower() if args.target_table else ""
    args_dict["table_name"] = args.target_table.split(
        ".")[1].lower() if args.target_table else ""
    args_dict["m"] = args.num_pappers
    args_dict["mode"] = args.mode
    args_dict["use_local_mode"] = "1" if args.use_local_mode else "0"
    # args_dict["is_overwrite"] = "1" if args.hive_overwrite else "0"

    sql = """
    SELECT t.connection_id,
           t.connection_name,
           t.db_type,
           t.host,
           t.user,
           t.password ,
           t.port,
           t.jdbc_extend,
           t.default_db,
           s.hive_database,
           s.hive_table,
           s.db_name,
           s.table_name,
           s.exec_engine,
           s.m,
           s.is_overwrite,
           s.is_drop,
           s.mode
      FROM meta_connections t
      LEFT JOIN meta_export s ON t.connection_id = s.connection_id
     ORDER BY t.connection_id
    """
    con = create_engine(get_engine_str("mysql").format(**config.DB_CONF),
                        poolclass=pool.NullPool)
    df = pd.read_sql(sql=sql, con=con)
    df["db_name"] = df["db_name"].map(lambda x: str(x).lower())
    df["table_name"] = df["table_name"].map(lambda x: str(x).lower())
    df["hive_database"] = df["hive_database"].map(lambda x: str(x).lower())
    df["hive_table"] = df["hive_table"].map(lambda x: str(x).lower())

    conn_names = df["connection_name"].drop_duplicates(keep="first").tolist()
    db_names = df[(df["db_name"].notna())
                  & (df["db_name"] != "")].drop_duplicates(
                      keep="first")["db_name"].tolist()
    table_names = df[(df["table_name"].notna())
                     & (df["table_name"] != "")].drop_duplicates(
                         keep="first")["table_name"].tolist()
    # print([x.lower() for x in table_names])
    if wizard_name not in conn_names:
        print("Error Message: -w 数据库链接名称不存在")
        sys.exit(1)
    if db != "" and db.lower() not in [x.lower() for x in db_names]:
        print("Error Message: --db 库名不存在")
        sys.exit(1)
    if tb != "" and tb.lower() not in [x.lower() for x in table_names]:
        print("Error Message: --tb 表名不存在")
        sys.exit(1)
    if tb != "" and args_dict["table_name"] != "":
        print("Error Message: --tb -s 不能同时指定")
        sys.exit(1)

    # print(conn_names, table_names)
    # tmp_row = [row for row in rows if wizard_name == row["connection_name"]][0]
    tmp_row = df[df["connection_name"] == wizard_name].to_dict("records")[0]
    args_dict["connection_id"] = tmp_row["connection_id"]
    args_dict["connection_name"] = tmp_row["connection_name"]
    args_dict["db_type"] = tmp_row["db_type"]
    args_dict["host"] = tmp_row["host"]
    args_dict["user"] = tmp_row["user"]
    args_dict["password"] = tmp_row["password"]
    args_dict["port"] = tmp_row["port"]
    args_dict["jdbc_extend"] = tmp_row["jdbc_extend"]
    args_dict["default_db"] = tmp_row["default_db"]
    if db != "" and tb != "":
        # args_dict = df[(df["connection_name"] == wizard_name) & (df["table_name"] == table_name_meta.lower())].head(1).to_dict("records")[0]
        args_dict = df[(df["connection_name"] == wizard_name)
                       & (df["db_name"] == db) & (df["table_name"] == tb)]
        # 将pandas的特有类型nan处理成原生的None
        args_dict = args_dict.where(args_dict.notna(), None)
        # DataFrame转换成dict
        args_dict = args_dict.to_dict("records")[0]
        # print(df.dtypes)
        # print(args_dict)

    if (db != "" and tb != "") or (args_dict["db_name"] != ""
                                   and args_dict["table_name"] != ""
                                   and args_dict["hive_database"] != ""
                                   and args_dict["hive_table"] != ""):
        pass
    else:
        print("Error Message: 必须指定 -w --db --tb 或者 -w -d -s -t 的参数值")
        sys.exit(1)
    # args_dict["hive_database"] = args_dict["hive_database"]
    # args_dict["hive_table"] = args_dict["hive_table"]
    args_dict["hive_full_name"] = args_dict["hive_database"] + "." + args_dict[
        "hive_table"]
    args_dict["db_conf"] = {
        "host": args_dict["host"],
        "port": args_dict["port"],
        "user": args_dict["user"],
        "password": args_dict["password"],
        "database": args_dict["db_name"],
        "charset": "utf8"
    }
    if not is_valid(args_dict["db_name"]):
        args_dict["db_name"] = args_dict["default_db"]
    if not is_valid(args_dict["mode"]):
        args_dict["mode"] = "rename"
    if not is_valid(args_dict["exec_engine"]):
        args_dict["exec_engine"] = "sqoop"
    if not is_valid(args_dict["m"]):
        args_dict["m"] = 1
    if args.num_pappers != '1':
        args_dict["m"] = args.num_pappers
    args_dict["m"] = int(args_dict["m"])
    print(args_dict)
    return args_dict