Ejemplo n.º 1
0
def generate_sqoop_cmd(src_db, dst_db, dst_table, etl_mode, res_query,
                       mapper_nums):
    connect, user_name, password = get_config("oyo_dw", etl_mode)
    mapreduce_job_name = ''.join(
        ["SQOOP_", etl_mode, "_", dst_db, ".", dst_table])
    sqoop_cmd = """sqoop \
    import \
    --hive-import \
    --hive-overwrite \
    --null-string '\\\\N' \
    --null-non-string '\\\\N' \
    --connect %s \
    --username %s \
    --password "%s"  """ % (connect, user_name, password)
    query = "--query '%s' " % res_query
    mappers = '--num-mappers %s' % mapper_nums
    ## 防止程序异常中断,重跑时报  Output directory   already exists
    target_dir = '--target-dir /user/tmp/sqoop/%s/%s/%s ' % (dst_db, dst_table,
                                                             version)
    hive_commands = """ 
    --hive-database  %s \
    --hive-table %s \
    --hive-delims-replacement " " \
    --driver oracle.jdbc.OracleDriver \
    --connection-manager org.apache.sqoop.manager.GenericJdbcManager \
    --mapreduce-job-name %s 
    """ % (dst_db, dst_table, mapreduce_job_name)
    sqoop_cmd = ' '.join(
        [sqoop_cmd, query, hive_commands, target_dir, mappers])
    # logging.log(logging.INFO, 'Generated sqoop command: %s' % sqoop_cmd)
    return sqoop_cmd
Ejemplo n.º 2
0
def generate_sqoop_cmd(src_db, dst_db, dst_table, etl_mode, res_query,
                       partition_key, partition_value, mapper_nums):
    connect, user_name, password = get_config(src_db, etl_mode)
    sqoop_cmd = """sqoop   \
    import \
    --hive-import \
    --hive-overwrite \
    --null-string '\\\\N' \
    --null-non-string '\\\\N' \
    --connect %s \
    --username %s \
    --password '%s'  """ % (connect, user_name, password)
    query = "--query '%s'" % res_query
    mappers = '--num-mappers %s' % mapper_nums
    target_dir = '--target-dir /user/tmp/sqoop/%s/%s/%s ' % (
        dst_db, dst_table, version
    )  ## 防止程序异常中断,重跑时报  Output directory   already exists
    hive_commands = """--hive-database  %s \
    --hive-table %s \
    --hive-delims-replacement " " \
    --hive-partition-key %s \
    --hive-partition-value "%s" """ % (dst_db, dst_table, partition_key,
                                       partition_value)
    sqoop_cmd = ' '.join(
        [sqoop_cmd, hive_commands, target_dir, query, mappers])
    # logging.log(logging.INFO, 'Generated sqoop command: %s' % sqoop_cmd)
    return sqoop_cmd
Ejemplo n.º 3
0
def hive2mysql(src_db, src_tb, dst_db, dst_table, dst_columns, etl_mode,
               res_query, mapper_nums, pre_sql):
    export_dir = "/user/tmp/sqoop/export/mysql/%s/%s/%s" % (src_db, src_tb,
                                                            version)
    hive_sql = """ INSERT OVERWRITE  DIRECTORY '%s' \
    %s 
    """ % (export_dir, res_query)
    hivecommand = hive_command(hive_sql)
    hive_exce_command(hivecommand)  ## 将数据写到目标路径
    connect, user_name, password = get_config(dst_db, etl_mode)
    db_mysql = get_db(connect, user_name, password)
    db_query_commit(db_mysql, pre_sql)  ## 先清除目标表数据
    sqoop_cmd = generate_sqoop_cmd(dst_db, dst_table, dst_columns, etl_mode,
                                   export_dir, mapper_nums)
    shell_exce_command(sqoop_cmd)
    shell_exce_command(
        "hdfs dfs -rm -r /user/tmp/sqoop/export/mysql/%s/%s/%s" %
        (src_db, src_tb, version))  ## 删除临时文件
Ejemplo n.º 4
0
def hive2mysqlupsert(src_db, src_tb, dst_db, dst_table, etl_mode, res_query,
                     mapper_nums, update_key):
    export_dir = "/user/tmp/sqoop/export/mysql/%s/%s/%s" % (src_db, src_tb,
                                                            version)
    hive_sql = """ INSERT OVERWRITE  DIRECTORY '%s' \
        row format delimited
        fields terminated by '\\001' 
    %s 
    """ % (export_dir, res_query)
    hivecommand = hive_command(hive_sql)
    hive_exce_command(hivecommand)  ## 将数据写到目标路径
    connect, user_name, password = get_config(dst_db, etl_mode)
    db_mysql = get_db(connect, user_name, password)
    sqoop_cmd = generate_sqoop_cmd(dst_db, dst_table, etl_mode, export_dir,
                                   mapper_nums, update_key)
    shell_exce_command(sqoop_cmd)
    shell_exce_command(
        "hdfs dfs -rm -r /user/tmp/sqoop/export/mysql/%s/%s/%s" %
        (src_db, src_tb, version))  ## 删除临时文件
Ejemplo n.º 5
0
def generate_sqoop_cmd(dst_db, dst_table, dst_columns, etl_mode, export_dir,
                       mapper_nums):
    mapreduce_job_name = ''.join(
        ["SQOOP_", etl_mode, "_", dst_db, ".", dst_table])
    connect, user_name, password = get_config(dst_db, etl_mode)
    sqoop_cmd = """sqoop   \
    export  \
    --table %s \
    --connect %s \
    --username %s \
    --password "%s" \
    --columns %s \
    --input-fields-terminated-by '\\001' \
    --input-lines-terminated-by '\\n' \
    --input-null-string '\\\\N' \
    --input-null-non-string '\\\\N' \
    --num-mappers %s \
    --export-dir %s \
    --mapreduce-job-name %s 
     """ % (dst_table, connect, user_name, password, dst_columns, mapper_nums,
            export_dir, mapreduce_job_name)
    # logging.log(logging.INFO, 'Generated sqoop command: %s' % sqoop_cmd)
    return sqoop_cmd
Ejemplo n.º 6
0
    port = int(matchObj.group(2))
    db_name = matchObj.group(3)

    db_mysql = pymysql.connect(host=host,
                               user=user_name,
                               passwd=password,
                               port=port,
                               db=db_name,
                               charset='utf8',
                               cursorclass=pymysql.cursors.DictCursor)
    return db_mysql


def db_query_commit(db_mysql, pre_sql):
    cursor = db_mysql.cursor()
    try:
        # 执行sql语句
        cursor.execute(pre_sql)
        # 提交到数据库执行
        db_mysql.commit()
    except Exception as e:
        # 如果发生错误则回滚
        db_mysql.rollback()
        raise MysqlDatabaseError("执行: mysql语句 %s 时出错:%s" % (pre_sql, e))
    db_mysql.close()


if __name__ == '__main__':
    connect, user_name, password = get_config('hera', 'mysql-full')
    db_mysql = get_db(connect, user_name, password)