Python parseconfs 예제들, etl_utils.parseconfs Python 예제들

예제 #1

0

파일 보기

파일: load_as_qp_s.py 프로젝트: tanghanerla/frank_python_toolbox

def getconfs(dbConfig):

    jsonobj = etl_utils.parseconfs(dbConfig)
    dic = {}

    dic["--connect"] = jsonobj["db.url"]
    # 使用的用户名
    dic["--username"] = jsonobj["db.username"]
    # 使用的密码
    dic["--password"] = '******' % (jsonobj["db.password"])

    print "=================================" + qp_dt

    dic["-m"] = "1"
    dic["--hive-import"] = " "
    partitionname = "biz_end_date"
    # partitionvalue ="30001231"
    partitionvalue = qp_dt
    partitionstr = " --hive-partition-key %s  --hive-partition-value %s " % (partitionname, partitionvalue)
    dic["--hive-table"] = jsonobj["hive_db"] + "." + jsonobj["hive_table"] + partitionstr

    tablename = jsonobj["hive_db"] + "." + jsonobj["hive_table"]

    truncateTable(tablename, qp_dt)

    dic["--split-by"] = jsonobj["sqoop.split-by"]

    querySql = buildQuerySql(jsonobj)
    dic["--query"] = '" %s "' % (querySql,)
    print "sqoop import sql:" + dic["--query"]

    current_time = datetime.datetime.now()

    date_str = current_time.strftime("%Y%m%d")
    date_str = etlDate
    time_str = current_time.strftime("%Y%m%d%H%M")

    dic["--fields-terminated-by"] = " '\\001' "
    dic["--null-string"] = " '\\\N' "
    dic["--null-non-string"] = " '\\\N' "
    # dic["--as-parquetfile"] = " "
    dic["--verbose"] = " "

    dic["--inline-lob-limit"] = "16777216"

    tableBasePath = "%s/%s/%s/%s%s/%s/" % (
        jsonobj["hdfs.root"],
        jsonobj["hdfs.category.input"],
        "init",
        jsonobj["hdfs.db_name"],
        jsonobj["hdfs.table_name"],
        jsonobj["hdfs.schema_version"],
    )

    dic["--target-dir"] = tableBasePath + "%s/%s" % (date_str, time_str)
    print "temp dir:%s" % (dic["--target-dir"],)

    jsonConfigList.append(dic)
    return dic

예제 #2

0

파일 보기

파일: load_as_qp_s.py 프로젝트: starshineman/frank_python_toolbox

def getconfs(dbConfig):

    jsonobj = etl_utils.parseconfs(dbConfig)
    dic = {}

    dic["--connect"] = jsonobj["db.url"]
    # 使用的用户名
    dic["--username"] = jsonobj["db.username"]
    # 使用的密码
    dic["--password"] = '******' % (jsonobj["db.password"])

    print "=================================" + qp_dt

    dic["-m"] = "1"
    dic["--hive-import"] = " "
    partitionname = "biz_end_date"
    #partitionvalue ="30001231"
    partitionvalue = qp_dt
    partitionstr = " --hive-partition-key %s  --hive-partition-value %s " % (
        partitionname, partitionvalue)
    dic["--hive-table"] = jsonobj["hive_db"] + "." + jsonobj[
        "hive_table"] + partitionstr

    tablename = jsonobj["hive_db"] + "." + jsonobj["hive_table"]

    truncateTable(tablename, qp_dt)

    dic["--split-by"] = jsonobj["sqoop.split-by"]

    querySql = buildQuerySql(jsonobj)
    dic["--query"] = '" %s "' % (querySql, )
    print "sqoop import sql:" + dic["--query"]

    current_time = datetime.datetime.now()

    date_str = current_time.strftime("%Y%m%d")
    date_str = etlDate
    time_str = current_time.strftime("%Y%m%d%H%M")

    dic["--fields-terminated-by"] = " '\\001' "
    dic["--null-string"] = " '\\\N' "
    dic["--null-non-string"] = " '\\\N' "
    #dic["--as-parquetfile"] = " "
    dic["--verbose"] = " "

    dic["--inline-lob-limit"] = "16777216"

    tableBasePath = "%s/%s/%s/%s%s/%s/" % (
        jsonobj["hdfs.root"], jsonobj["hdfs.category.input"], "init",
        jsonobj["hdfs.db_name"], jsonobj["hdfs.table_name"],
        jsonobj["hdfs.schema_version"])

    dic["--target-dir"] = tableBasePath + "%s/%s" % (date_str, time_str)
    print "temp dir:%s" % (dic["--target-dir"], )

    jsonConfigList.append(dic)
    return dic

예제 #3

0

파일 보기

파일: etl_checkdata.py 프로젝트: starshineman/frank_python_toolbox

def check_data(dbConfig, qp_dt, sys_dt, g_where_condition, fromdb, round_time):
    jsonobj = etl_utils.parseconfs(dbConfig)

    g_source_table = jsonobj["db.table_name"]
    g_source_user = jsonobj["db.username"]
    g_source_pass = jsonobj["db.password"]
    g_hive_db = jsonobj["hive_db"]
    g_hive_table = jsonobj["hive_table"]
    g_source_db = jsonobj["db.database"]
    # source DB type: oracle / mysql
    db_url = jsonobj["db.url"]
    g_source_type = db_url.split(":")[1]

    db_url = db_url[db_url.find("//") + 2:]
    url_items = db_url.split("/")
    g_source_tns = url_items[1]
    g_source_host = url_items[0].split(":")[0]
    g_source_port = url_items[0].split(":")[1]

    g_run_time = sys_dt
    g_data_date = qp_dt
    g_round = round_time
    g_run_type = fromdb

    if g_run_type == 'source':
        # 源数据库记录个数
        row_count = get_source_count(g_source_db, g_source_type, g_source_host,
                                     g_source_port, g_source_tns,
                                     g_source_user, g_source_pass,
                                     g_source_table, g_where_condition)

        # 写入知识库
        log_conn = get_mysql_conn(ETL_LOG_HOST, ETL_LOG_PORT, ETL_LOG_DB,
                                  ETL_LOG_USER, ETL_LOG_PASS)
        if g_round == 1:
            insert_etl_log(log_conn, g_source_tns, g_source_table, g_data_date,
                           g_run_time, row_count)
        elif g_round == 2:
            update_source_etl_log(log_conn, g_source_tns, g_source_table,
                                  g_data_date, g_run_time, row_count)
    else:
        # 目标数据库记录个数
        row_count = get_target_count(g_hive_db, g_hive_table,
                                     g_where_condition)

        # 写入知识库
        log_conn = get_mysql_conn(ETL_LOG_HOST, ETL_LOG_PORT, ETL_LOG_DB,
                                  ETL_LOG_USER, ETL_LOG_PASS)
        update_target_etl_log(log_conn, g_source_tns, g_source_table,
                              g_data_date, g_run_time, row_count)

    return 0

예제 #4

0

파일 보기

파일: sqoop_importor.py 프로젝트: starshineman/frank_python_toolbox

def getconfs(dbConfig):

    jsonobj = etl_utils.parseconfs(dbConfig)
    dic = {}

    dic["--connect"] = jsonobj["db.url"]
    # 使用的用户名
    dic["--username"] = jsonobj["db.username"]
    # 使用的密码
    dic["--password"] = '******' % (jsonobj["db.password"])

    dic["-m"] = "1"

    dic["--split-by"] = jsonobj["sqoop.split-by"]

    querySql = buildQuerySql(jsonobj)
    dic["--query"] = '"%sand $CONDITIONS"' % (querySql, )
    print "sqoop import sql:" + dic["--query"]

    current_time = datetime.datetime.now()

    date_str = current_time.strftime("%Y%m%d")
    date_str = etlDate
    time_str = current_time.strftime("%Y%m%d%H%M")

    tableBasePath = "%s/%s/%s/%s/%s/" % (
        jsonobj["hdfs.root"], jsonobj["hdfs.category.input"],
        jsonobj["hdfs.db_name"], jsonobj["hdfs.table_name"],
        jsonobj["hdfs.schema_version"])

    dic["--target-dir"] = tableBasePath + "%s/%s" % (date_str, time_str)

    print "target dir: " + dic["--target-dir"]

    dic["--fields-terminated-by"] = " '\001' "
    dic["--hive-drop-import-delims"] = "  "

    #dic["--fields-terminated-by"] = " '\\001' "
    dic["--null-string"] = " '\\\N' "
    dic["--null-non-string"] = " '\\\N' "
    #dic["--as-parquetfile"] = " "
    dic["--verbose"] = " "

    dic["--inline-lob-limit"] = "16777216"

    jsonConfigList.append(dic)

    return dic

예제 #5

0

파일 보기

파일: sqoop_importor.py 프로젝트: tanghanerla/frank_python_toolbox

def getconfs(dbConfig):

    jsonobj = etl_utils.parseconfs(dbConfig)
    dic = {}

    dic["--connect"] = jsonobj["db.url"]
    # 使用的用户名
    dic["--username"] = jsonobj["db.username"]
    # 使用的密码
    dic["--password"] = '******'%(jsonobj["db.password"])


    dic["-m"]= "1"

    dic["--split-by"]= jsonobj["sqoop.split-by"]

    querySql =  buildQuerySql(jsonobj)
    dic["--query"]= '"%sand $CONDITIONS"'%(querySql,)
    print "sqoop import sql:" + dic["--query"]

    current_time = datetime.datetime.now()

    date_str = current_time.strftime("%Y%m%d")
    date_str = etlDate
    time_str = current_time.strftime("%Y%m%d%H%M")

    tableBasePath = "%s/%s/%s/%s/%s/"%(jsonobj["hdfs.root"],jsonobj["hdfs.category.input"],jsonobj["hdfs.db_name"], jsonobj["hdfs.table_name"],jsonobj["hdfs.schema_version"])

    dic["--target-dir"] = tableBasePath + "%s/%s"%(date_str,time_str)

    print "target dir: " +  dic["--target-dir"]


    dic["--fields-terminated-by"] = " '\001' "
    dic["--hive-drop-import-delims"] = "  "


    #dic["--fields-terminated-by"] = " '\\001' "
    dic["--null-string"] = " '\\\N' "
    dic["--null-non-string"] = " '\\\N' "
    #dic["--as-parquetfile"] = " "
    dic["--verbose"] = " "

    dic["--inline-lob-limit"] = "16777216"

    jsonConfigList.append(dic)

    return dic

예제 #6

0

파일 보기

파일: new_chain_i.py 프로젝트: starshineman/frank_python_toolbox

def parseconfs(dbConfig):

    jsonobj = etl_utils.parseconfs(dbConfig)
    dbConfigFileName = dbConfig

    print "table json Schema file:  " + dbConfigFileName
    dic = {}

    dic["hive_table"] = jsonobj["hive_table"]
    dic["hive_db"] = jsonobj["hive_db"]
    dic["table_name"] = jsonobj["db.table_name"]

    colsList = jsonobj["columns"]
    srcTbKeys = jsonobj["db.table_keys"]
    srcTbKeysList = srcTbKeys.split(",")

    srcTbKeysList = etl_utils.formatList(srcTbKeysList)
    colsStr = ""

    count = 0
    joincount = 0

    colStr_md5 = ""
    colStr_as_h = ""
    colStr_as_m = ""
    colStr_h = ""
    colStr_coalesce = ""
    colStr_m = ""

    joinColStr = ""
    p_k = ""

    for col in colsList:

        #if col["primary_key"]=="true" or (col["name"] in srcTbKeysList):
        if (col["name"].upper() in srcTbKeysList):

            joinCol = "h.h_" + col["name"] + "=" + "m.m_" + col["name"]
            p_k = col["name"]

            if joincount == 0:
                joinColStr = joinColStr + joinCol
            else:
                joinColStr = joinColStr + " and " + joinCol

            joincount = joincount + 1

        colName = col["name"]
        colType = col["type"]
        if count == 0:

            #用于拼H表字段串
            colStr_as_h = colStr_as_h + "%s" % (colName) + " as " + "h_%s" % (
                colName)
            #用于拼m表字段串
            colStr_as_m = colStr_as_m + "%s" % (colName) + " as " + "m_%s" % (
                colName)
            #用于拼第一个插入字段串
            colStr_h = colStr_h + "h_%s" % (colName)
            #用于拼第二个字段串
            colStr_coalesce = colStr_coalesce + "coalesce(h_%s" % (
                colName) + " , " + "m_%s)" % (colName)
            #用于拼第二个字段串
            colStr_m = colStr_m + "m_%s" % (colName)
            #用于拼md5串
            if colType[0:7] == "decimal":
                colName = "cast(" + colName + " as string)"
                colStr_md5 = colStr_md5 + "%s" % (colName)
            else:
                colStr_md5 = colStr_md5 + "%s" % (colName)
        else:

            #用于拼H表字段串
            colStr_as_h = colStr_as_h + ",%s" % (colName) + " as " + "h_%s" % (
                colName)
            #用于拼m表字段串
            colStr_as_m = colStr_as_m + ",%s" % (colName) + " as " + "m_%s" % (
                colName)
            #用于拼第一个插入字段串
            colStr_h = colStr_h + ",h_%s" % (colName)
            #用于拼第二个字段串
            colStr_coalesce = colStr_coalesce + ",coalesce(h_%s" % (
                colName) + " , " + "m_%s)" % (colName)
            #用于拼第三个字段串
            colStr_m = colStr_m + ",m_%s" % (colName)
            #用于拼md5串
            if colType[0:7] == "decimal":
                colName = "cast(" + colName + " as string)"
                colStr_md5 = colStr_md5 + ",%s" % (colName)
            else:
                colStr_md5 = colStr_md5 + ",%s" % (colName)
        count = count + 1

    print "joinColStr: " + joinColStr
    dic["joinColStr"] = joinColStr

    print "p_k: " + p_k
    dic["p_k"] = p_k

    print "colStr_md5:%s" % (colStr_md5, )
    dic["colStr_md5"] = colStr_md5

    print "colStr_as_h:%s" % (colStr_as_h, )
    dic["colStr_as_h"] = colStr_as_h

    print "colStr_as_m:%s" % (colStr_as_m, )
    dic["colStr_as_m"] = colStr_as_m

    print "colStr_h:%s" % (colStr_h, )
    dic["colStr_h"] = colStr_h

    print "colStr_coalesce:%s" % (colStr_coalesce, )
    dic["colStr_coalesce"] = colStr_coalesce

    print "colStr_m:%s" % (colStr_m, )
    dic["colStr_m"] = colStr_m

    jsonConfigList.append(dic)
    return dic

예제 #7

0

파일 보기

파일: new_chain_i.py 프로젝트: tanghanerla/frank_python_toolbox

def parseconfs(dbConfig):

    jsonobj = etl_utils.parseconfs(dbConfig)
    dbConfigFileName  = dbConfig

    print "table json Schema file:  " + dbConfigFileName
    dic = {}

    dic["hive_table"]=jsonobj["hive_table"]
    dic["hive_db"] = jsonobj["hive_db"]
    dic["table_name"] = jsonobj["db.table_name"]


    colsList = jsonobj["columns"]
    srcTbKeys = jsonobj["db.table_keys"]
    srcTbKeysList = srcTbKeys.split(",")

    srcTbKeysList = etl_utils.formatList(srcTbKeysList)
    colsStr = ""



    count = 0
    joincount = 0

    colStr_md5 = ""
    colStr_as_h = ""
    colStr_as_m = ""
    colStr_h = ""
    colStr_coalesce = ""
    colStr_m = ""

    joinColStr = ""
    p_k  = ""

    for col in colsList:


            #if col["primary_key"]=="true" or (col["name"] in srcTbKeysList):
            if  (col["name"].upper() in srcTbKeysList):

               joinCol = "h.h_" + col["name"] + "=" + "m.m_" + col["name"]
               p_k =  col["name"]

               if joincount == 0:
                 joinColStr =  joinColStr + joinCol
               else:
                 joinColStr = joinColStr + " and " + joinCol

               joincount = joincount +1



            colName = col["name"]
            colType = col["type"]
            if count == 0:
                
                #用于拼H表字段串
                colStr_as_h = colStr_as_h + "%s"%(colName) + " as " + "h_%s"%(colName)
                #用于拼m表字段串
                colStr_as_m = colStr_as_m + "%s"%(colName) + " as " + "m_%s"%(colName)
                #用于拼第一个插入字段串
                colStr_h = colStr_h   + "h_%s"%(colName)
                #用于拼第二个字段串
                colStr_coalesce = colStr_coalesce + "coalesce(h_%s"%(colName) + " , " + "m_%s)"%(colName)
                #用于拼第二个字段串
                colStr_m = colStr_m  + "m_%s"%(colName)
                #用于拼md5串
                if colType[0:7]=="decimal": 
                   colName = "cast(" + colName + " as string)"
                   colStr_md5 = colStr_md5 + "%s"%(colName)
                else:
                	 colStr_md5 = colStr_md5 + "%s"%(colName)
            else:

                 
                #用于拼H表字段串
                colStr_as_h = colStr_as_h + ",%s"%(colName) + " as " + "h_%s"%(colName)
                #用于拼m表字段串
                colStr_as_m = colStr_as_m + ",%s"%(colName) + " as " + "m_%s"%(colName)
                #用于拼第一个插入字段串
                colStr_h = colStr_h   + ",h_%s"%(colName)
                #用于拼第二个字段串
                colStr_coalesce = colStr_coalesce + ",coalesce(h_%s"%(colName) + " , " + "m_%s)"%(colName)
                #用于拼第三个字段串
                colStr_m = colStr_m  + ",m_%s"%(colName)
                #用于拼md5串
                if colType[0:7]=="decimal": 
                   colName = "cast(" + colName + " as string)"
                   colStr_md5 = colStr_md5 + ",%s"%(colName)
                else:
                	 colStr_md5 = colStr_md5 + ",%s"%(colName) 
            count = count + 1

    print "joinColStr: " + joinColStr
    dic["joinColStr"] = joinColStr

    print "p_k: " + p_k
    dic["p_k"] = p_k


    print "colStr_md5:%s"%(colStr_md5,)
    dic["colStr_md5"] = colStr_md5

    print "colStr_as_h:%s"%(colStr_as_h,)
    dic["colStr_as_h"] = colStr_as_h

    print "colStr_as_m:%s"%(colStr_as_m,)
    dic["colStr_as_m"] = colStr_as_m


    print "colStr_h:%s"%(colStr_h,)
    dic["colStr_h"] = colStr_h

    print "colStr_coalesce:%s"%(colStr_coalesce,)
    dic["colStr_coalesce"] = colStr_coalesce

    print "colStr_m:%s"%(colStr_m,)
    dic["colStr_m"] = colStr_m
    


    jsonConfigList.append(dic)
    return dic