def deleteT(db, date):
    tables1 = gethiveT(db)  #传库名
    tables2 = getCT(db)  #传库名
    conn = MySQL(config.washmeta)
    tables1 = getDBs(tables1)  #传库名
    len2 = len(tables2)
    flag = True
    if len2 == 0:
        for table1 in tables1:
            table1['Create_Dt'] = date
            table1['Data_Tbl_UUID'] = uuid.uuid1()
            logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
            conn.insert("data_tbl", table1)
    for table2 in tables2:
        for table1 in tables1:
            if table2.get('Data_Tbl_Phys_Nm') == table1.get(
                    'Data_Tbl_Phys_Nm'):
                flag = False
                break
        if flag:
            logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
            try:
                conn.execute(
                    "update data_tbl set Del_Dt='{}' where Data_Tbl_Phys_Nm='{}'"
                    .format(date, table2.get('Data_Tbl_Phys_Nm')))
            except Exception:
                logging.error('删除表元数据失败,数据为:' + str(table2))
                print traceback.format_exc()
                sys.exit(1)
        flag = True
    del conn
def insertNewP(db, date):
    partitions1 = gethiveP(db)  #传库名
    partitions2 = getCP(db)  #传库名
    conn = MySQL(config.washmeta)
    partitions1 = getTableID(partitions1)
    len2 = len(partitions2)
    if len2 == 0:
        for p1 in partitions1:
            logging.debug('partition:' + str(p1['Data_Tblid']) + ':' +
                          p1['Dp_Path'])
            try:
                conn.insert("dp", p1)
            except Exception:
                logging.error('第一次插入分区数据失败,数据为:' + str(p1))
                print traceback.format_exc()
                sys.exit(1)
    else:
        for p1 in partitions1:
            flag = True
            for p2 in partitions2:
                if p1.get('Data_Tblid') == p2.get('Data_Tblid') and p1.get(
                        'Dp_Path') == p2.get('Dp_Path'):
                    flag = False
                    break
            if flag:
                logging.debug('插入分区partition:' + str(p1['Data_Tblid']) + ':' +
                              p1['Dp_Path'])
                try:
                    conn.insert("dp", p1)
                except Exception:
                    logging.error('插入分区数据失败,数据为:' + str(p1))
                    print traceback.format_exc()
                    sys.exit(1)
        for p2 in partitions2:
            flag = True
            for p1 in partitions1:
                if p2.get('Data_Tblid') == p1.get('Data_Tblid') and p2.get(
                        'Dp_Path') == p1.get('Dp_Path'):
                    flag = False
                    break
            if flag:
                logging.debug('删除分区partition:' + str(p2['Data_Tblid']) + ':' +
                              p2['Dp_Path'])
                try:
                    conn.execute(
                        "delete from dp where Data_Tblid='{}' and Dp_Path='{}'"
                        .format(p2.get('Data_Tblid'), p2.get('Dp_Path')))
                except Exception:
                    logging.error('删除分区数据失败,数据为:' + str(p1))
                    print traceback.format_exc()
                    sys.exit(1)

    del conn
Exemple #3
0
def get_comment(table_id):
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.TABLE_COMMENT, (table_id, ))
    table_comment = str(result[0].get("Data_Tbl_Cn_Nm")
                        or '').strip('\n').strip('\r').replace(';', '')
    result = db.execute(const.FIELD_COMMENT, (table_id, ))
    field_comment = [
        str(rs.get("Fld_Cn_Nm")
            or '').strip('\n').strip('\r').replace(';', '') for rs in result
    ]
    del db
    return table_comment, field_comment
def insertCByT(db, tb):
    conn1 = MySQL(config.hivemeta)
    conn2 = MySQL(config.washmeta)
    hivesql = """select 
'{}' AS Data_Tblid,
t1.COLUMN_NAME as Fld_Phys_Nm,
t1.COMMENT as Fld_Cn_Nm,
t1.TYPE_NAME as Fld_Data_Type,
t1.INTEGER_IDX as Fld_Ord
from columns_v2 t1
left join sds t2
on t1.cd_id = t2.cd_id
left join tbls t3
on t2.sd_id = t3.sd_id
left join dbs t4
on t3.db_id=t4.db_id
where t4.name='{}' and t3.tbl_name='{}'"""
    cs1 = conn1.execute(
        hivesql.format(tb.get('Data_Tbl_Phys_Nm'), db,
                       tb.get('Data_Tbl_Phys_Nm')))
    cs1 = getTableID(cs1)
    for c in cs1:
        c['Create_Dt'] = tb['Create_Dt']
        try:
            logging.debug("插入新增表字段:" + str(c['Fld_Phys_Nm']))
            conn2.insert('data_fld', c)
        except Exception as e:
            logging.error("插入新增表字段失败:" + str(c['Fld_Phys_Nm']))
            print traceback.format_exc()
    del conn1
    del conn2
Exemple #5
0
def sanhuangua_join_profile(data, ds):
    mysql = MySQL()

    # delete
    delete_sql = 'delete from nsh_sanhuangua_tmp'
    mysql.execute(delete_sql)
    # insert
    mysql.batch_insert('nsh_sanhuangua_tmp', ['role_id', 'suspect_score'], data)

    PROFILE_SQL = """
    select a.suspect_score, b.*
    from anti_plugin.nsh_sanhuangua_tmp a 
    join luoge_nsh_mid.mid_role_portrait_all_d b on a.role_id = b.role_id
    where b.ds = '{ds}'
    """

    sql = PROFILE_SQL.format(ds=ds)
    logging.info(sql)

    params = {
        'sql': sql,
        'needReturn': 'true'
    }

    # 关联画像请求,返回结果
    result = requests.post(SHUYUAN_URL, timeout=6000, json=params)

    # json转成字典
    id_profile_dict = {}
    for line in result.json()['data']:
        profile_dict = {}
        for k, v in line.items():
            k = k.split('.')[-1]
            v = '0' if v is None else v
            profile_dict[k] = v

        profiles = [profile_dict.get(col, '') for col in PROFILE_COLS]
        role_id = profile_dict['role_id']
        id_profile_dict[role_id] = profiles

    # 结果字典
    profile_data = list()
    for role_id, profiles in id_profile_dict.items():
        row = [role_id] + profiles
        profile_data.append(row)

    return profile_data
Exemple #6
0
def get_target_database(database, usage):
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.TARGET_DATABASE, (database, usage))
    del db
    if len(result) == 0:
        raise exception.DQCException(
            "target database is non-exist. [database:%s]" % database)
    return result[0]["db_phys_nm"]
Exemple #7
0
def get_change_ddl(table_id):
    flag = False
    db = MySQL(config.dqc_mysql)
    result = db.execute(mask_const.TABLE_CHANGE, (table_id, ))
    if len(result) != 0:
        flag = True
    del db
    return flag
def compareC(db, table1, table2):
    conn1 = MySQL(config.hivemeta)
    conn2 = MySQL(config.washmeta)
    c1 = conn1.execute(
        const.getTCSql.format(db, table1.get('Data_Tbl_Phys_Nm')))
    c2 = conn2.execute(
        const.getCTCSql.format(db, table2.get('Data_Tbl_Phys_Nm')))
    del conn1
    del conn2
    c1.sort()
    c2.sort()
    if cmp(c1, c2) != 0:
        # print 'c1:',c1
        # print 'c2:',c2
        return True
    else:
        return False
Exemple #9
0
def get_database_table(table_id):
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.DB_TABLE, (table_id, ))
    del db
    if len(result) == 0:
        raise exception.DQCException(
            "database table is non-exist. [table_id:%s]" % table_id)
    return result[0]["db_phys_nm"], result[0]["data_tbl_phys_nm"]
Exemple #10
0
def get_check_item():
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.CHECK_ITEM)
    del db
    item = {}
    for rs in result:
        item[rs['Chk_Proj_Cd']] = rs['Chk_Projid']
    return item
Exemple #11
0
def get_partition_path(table_id, partition_date):
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.PARTITION_PATH, (table_id, partition_date))
    del db
    if len(result) == 0:
        return None
    else:
        return result[0]["dp_path"]
def get_label_id(table_id):
    db = MySQL(config.dqc_mysql)
    result = db.execute(mask_const.MASK_LABLE, (table_id, ))
    del db
    if len(result) == 0:
        raise exception.MaskException(
            "table lable is non-exist. [table_id:%s]" % table_id)
    return result[0]["Labelid"]
Exemple #13
0
def get_field(table_id):
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.FIELD_TABLE, (table_id, ))
    del db
    item = {}
    for rs in result:
        item[rs['Fld_Phys_Nm']] = rs['Fldid']
    return item
Exemple #14
0
def get_partition_latest(table_id):
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.MAX_PARTITION_DATE, (table_id, ))
    del db
    if result[0]["latest"] is None:
        raise exception.DQCException(
            "table partition is non-exist. [table_id:%s]" % table_id)
    else:
        return result[0]["latest"]
Exemple #15
0
def get_mask_cmpu():
    db = MySQL(config.dqc_mysql)
    result = db.execute(mask_const.MASK_CMPU)
    del db
    mask_cmpu = {
        rs["Data_Wash_Cmpu_Cd"]: rs["Data_Wash_Cmpuid"]
        for rs in result
    }
    return mask_cmpu
def getCC(db):
    try:
        conn = MySQL(config.washmeta)
        columns = conn.execute(const.getCCSql.format(db))
    except Exception:
        logging.error('获取清洗库 字段数据 失败!')
        print traceback.format_exc()
        sys.exit(1)
    del conn
    return columns
def gethiveC(db):
    try:
        conn = MySQL(config.hivemeta)
        columns = conn.execute(const.getCSql.format(db))
    except Exception:
        logging.error('获取hive 字段元数据失败')
        print traceback.format_exc()
        sys.exit(1)
    del conn
    return columns
def getCT(db):
    try:
        conn = MySQL(config.washmeta)
        tables = conn.execute(const.getCTSql.format(db))
    except Exception:
        logging.error('获取清洗库 表元数据 失败!')
        print traceback.format_exc()
        sys.exit(1)
    del conn
    return tables
def gethiveP(db):
    try:
        conn = MySQL(config.hivemeta)
        partitions = conn.execute(const.getPSql.format(db))
    except Exception:
        logging.error('获取hive 分区元数据失败!')
        print traceback.format_exc()
        sys.exit(1)
    del conn
    return partitions
def compareP(db, table1, table2):
    conn1 = MySQL(config.hivemeta)
    conn2 = MySQL(config.washmeta)
    ps1 = conn1.execute(
        const.getTPSql.format(db, table1.get('Data_Tbl_Phys_Nm')))
    ps2 = conn2.execute(
        const.getCTPSql.format(db, table2.get('Data_Tbl_Phys_Nm')))
    del conn1
    del conn2
    flag = False
    for p1 in ps1:
        flag = True
        for p2 in ps2:
            if p1 == p2:
                flag = False
                ps2.remove(p2)
                break
        if flag == True:
            break
    return flag
def updateC(db, date):
    columns1 = gethiveC(db)  #传库名
    columns2 = getCC(db)  #传库名
    conn = MySQL(config.washmeta)
    columns1 = getTableID(columns1)
    len2 = len(columns2)
    flag = True
    if len2 == 0:
        for c1 in columns1:
            c1['Create_Dt'] = date
            logging.debug('column:' + str(c1['Data_Tblid']) + ':' +
                          c1['Fld_Phys_Nm'])
            try:
                conn.insert("data_fld", c1)
            except Exception:
                logging.error('第一次插入字段数据失败,失败数据为:' + str(c1))
                print traceback.format_exc()
                sys.exit(1)
    else:
        for c1 in columns1:
            flag = True
            for c2 in columns2:
                if c1.get('Data_Tblid') == c2.get('Data_Tblid') and c1.get('Fld_Phys_Nm') == c2.get('Fld_Phys_Nm') and c1.get('Fld_Cn_Nm') == c2.get('Fld_Cn_Nm') \
                        and c1.get('Fld_Data_Type') == c2.get('Fld_Data_Type') and c1.get('Fld_Ord') == c2.get('Fld_Ord'):
                    flag = False
                    break
            if flag:
                c1['Upd_Dt'] = date
                logging.debug('column:' + str(c1['Data_Tblid']) + ':' +
                              c1['Fld_Phys_Nm'])
                try:
                    conn.execute(
                        "update data_fld set Fld_Cn_Nm='{}',Fld_Data_Type='{}',Fld_Ord='{}',Upd_Dt='{}' where Data_Tblid='{}' and Fld_Phys_Nm='{}'"
                        .format(c1['Fld_Cn_Nm'], c1['Fld_Data_Type'],
                                c1['Fld_Ord'], c1['Upd_Dt'], c1['Data_Tblid'],
                                c1['Fld_Phys_Nm']))
                except Exception:
                    logging.error('插入新增字段数据失败,数据为:' + str(c1))
                    print traceback.format_exc()
                    sys.exit(1)
    del conn
def getDBs(tables):
    try:
        conn = MySQL(config.washmeta)
        dbs = conn.execute(const.getDBs)
    except Exception:
        logging.error('获取库ID 失败!')
        print traceback.format_exc()
        sys.exit(1)
    for table in tables:
        for db in dbs:
            if table.get('Dbid') == db.get('Db_Phys_Nm'):
                table['Dbid'] = db['Dbid']
    return tables
def getTableID(tables1):
    try:
        conn = MySQL(config.washmeta)
        tables2 = conn.execute(
            'select Data_Tblid,Data_Tbl_Phys_Nm from data_tbl')
    except Exception:
        logging.error('获取表ID 失败!')
        print traceback.format_exc()
        sys.exit(1)
    for t1 in tables1:
        for t2 in tables2:
            if t1.get('Data_Tblid') == t2.get('Data_Tbl_Phys_Nm'):
                t1['Data_Tblid'] = t2.get('Data_Tblid')
    return tables1
def updateT(db, date):
    tables1 = gethiveT(db)  #传库名
    tables2 = getCT(db)  #传库名
    conn = MySQL(config.washmeta)
    tables1 = getDBs(tables1)  #传库名
    len2 = len(tables2)
    if len2 == 0:
        for table1 in tables1:
            table1['Create_Dt'] = date
            table1['Data_Tbl_UUID'] = uuid.uuid1()
            logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
            try:
                conn.insert("data_tbl", table1)
            except Exception:
                logging.error('第一次插入表数据失败,插入数据是:' + str(table1))
                print traceback.format_exc()
                sys.exit(1)
    for table1 in tables1:
        for table2 in tables2:
            if table1.get('Data_Tbl_Phys_Nm') == table2.get(
                    'Data_Tbl_Phys_Nm'):
                # print 'hive 表:',table1.get('Data_Tbl_Phys_Nm')
                # print 'clean 表:',table2.get('Data_Tbl_Phys_Nm')
                # if (not compareP(table1, table2) or not compareC(table1, table2)):
                if (compareP(db, table1, table2)
                        or compareC(db, table1, table2)):
                    logging.debug('对比表table1:' + table1['Data_Tbl_Phys_Nm'])
                    try:
                        conn.execute(
                            "update data_tbl set Upd_Dt='{}' where Data_Tbl_Phys_Nm='{}'"
                            .format(date, table1.get('Data_Tbl_Phys_Nm')))
                    except Exception:
                        logging.error('更新表元数据失败,数据为:' + str(table1))
                        print traceback.format_exc()
                        sys.exit(1)
    del conn
def mask_job_type(jobid):
    """
    作业类型 Job_Type
          2 立即执行数据脱敏
          3 周期执行数据脱敏
    """
    db = MySQL(config.dqc_mysql)
    result = db.execute(mask_const.JOB_TYPE, (jobid, ))
    del db
    if len(result) == 0:
        raise exception.MaskException("jobid is non-exist. [jobid:%s]" % jobid)
    job_type = result[0]["job_type"]
    if job_type != mask_const.MASK_IMM_MODE and job_type != mask_const.MASK_FREQ_MODE:
        raise exception.MaskException("job type is invalid. [jobid:%s]" %
                                      jobid)
    return job_type
Exemple #26
0
def get_metadata_field(table_id):
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.TABLE_FIELD, (table_id, ))
    del db
    field = []
    datatype = []
    pk = []
    null = []
    for rs in result:
        field.append(rs["fld_phys_nm"])
        datatype.append(rs["fld_data_type"])
        if rs["if_pk"] == 1:
            pk.append(rs["fld_phys_nm"])
        if rs["if_can_null"] == 0:
            null.append(rs["fld_phys_nm"])
    return field, datatype, pk, null
Exemple #27
0
def get_field_check(table_id):
    """
    数据结构说明:
    {
        "字段1": [规则1,规则2, ...],
        "字段2": [规则1],
        ...
    }
    """
    db = MySQL(config.dqc_mysql)
    result = db.execute(const.FIELD_CHECK, (table_id, ))
    del db
    check = collections.OrderedDict()
    items = []
    for rs in result:
        if check.get(rs["fld_phys_nm"], None) is None:
            items.clear()
        if rs["chk_proj_cd"] in items:
            continue
        else:
            items.append(rs["chk_proj_cd"])
            check[rs["fld_phys_nm"]] = items.copy()
    return check
def access():
    db = MySQL(config.dqc_mysql)
    result = db.execute(ACCESS_SQL)
    del db
    return result
Exemple #29
0
class Tables:
    """Create or Drop tables,delete data from tables
    """
    def __init__(self):
        self._logger = Logger(__file__)
        try:
            fsock = open("sqls.xml", "r")
        except IOError:
            self._logger.error("The file don't exist, Please double check!")
        self.sqls = BeautifulSoup(fsock.read())
        dbconfig = {'host':'127.0.0.1', 
                'port': 3306, 
                'user':'******', 
                'passwd':'123456', 
                'db':'scenic', 
                'charset':'utf8'}
        self.db = MySQL(dbconfig)

    def initDB(self):
        """create all tables
        """
        createSqls = self.sqls.find(id="createSql")
        for item in createSqls.select("item"):
            sql = item.string
            self._logger.info("create the table "+item.attrs["id"])
            self.db.execute(sql)
        # must reopen the cursor, or it will raise exception with error code 1024. What a f*****g error
        self.db.reopenCursor()

    def createTable(self,name):
        """create a specified table
        """
        create = self.sqls.find(id="createSql").find(id=name).string
        if create:
            self._logger.info(" create table "+name)
            self.db.execute(create)
        else:
            self._logger.error("error occured when create table "+name)
        
    def dropAll(self):
        """drop all the tables
        """
        dropSqls= self.sqls.find(id="dropSql")
        for item in dropSqls.select("item"):
            sql = item.string
            self._logger.info("drop the table "+item.attrs["id"])
            self.db.execute(sql)
    def dropTable(self,name):
        """drop specified table
        """
        drop = self.sqls.find(id="dropSql").find(name)
        if drop:
            self._logger.info("drop the table "+name)
            self.db.execute(sql)
        else:
            self._logger.warn("Don't have the table "+name)
    def cleanAll(self):
        """delete data from all the tables,but not drop tables
        """
        cleanSqls= self.sqls.find(id="cleanSql")
        for item in cleanSqls.select("item"):
            sql = item.string
            self._logger.info("clean the table "+item.attrs["id"])
            self.db.execute(sql)
    def cleanTable(self,name):
        """clean the data of specified table
        """
        pass

    def insertTable(self,name,params):
        """insert values int to the specified table
        # Parameters:
        name: the name of the table
        params: the value insert into the tables. It can be tuple for inserting a row,or can be a list to insert serveral rows
        # Return:
        """
        insert = self.sqls.find(id="insertSql").find(id=name).string
        if insert:
            self._logger.info(" insert into table "+name)
            self.db.insert(insert,params)
        else:
            self._logger.error("did not find the table "+name+" when insert")

    def insertData(self,data):
        """It is the interface for outer calling
        # Parameters:
        data: the value insert into the tables. It can be tuple for inserting a row,or can be a list to insert serveral rows
        # Return:
        """
        if isinstance(data,Scenic):
            data.encode()
            types = self.joint(data.types)
            seasons = self.joint(data.fits)
            sceneryParams = (data.id,data.name,data.province,data.city,data.area,data.level,data.quality,data.description,data.website,data.symbol,data.opentime,data.closetime,data.price,data.suggest,seasons,types,data.longitude,data.latitude,data.precise,data.confidence)
            imageParams = []
            for item in data.images:
                imageParams.append( (data.id,str(uuid.uuid1()),item,data.name,data.name) )
            self.insertTable("scenery",sceneryParams)
            # insert into database when only there are pictures,or it will occur error
            if imageParams:
                self.insertTable("sceneryImages",imageParams)
        else:
            self._logger.error("the parameter is not the instance of Scenic")
            return False

    def joint(self,data,split=","):
        """Joint list with split parameter,default is ,
        """
        result = ""
        if isinstance(data,list):
            length = len(data)
            if length > 0:
                result = result+data[0]
                for i in range(1,length):
                    result = result+split+data[i]
        return result

                
    def initTables(self):
        """Initial basic tables including sceneryType,season
        """
        basic = SearchParams()
        # insert basic data into sceneryType table
        params = []
        for item in basic.scenicType.keys():
            params.append((basic.scenicType[item],item,item))
        self.insertTable("sceneryType",params)
        # insert basic data into season table
        params = []
        for item in basic.scenicFit.keys():
            params.append((basic.scenicFit[item],item))
        self.insertTable("season",params)
def share():
    db = MySQL(config.dqc_mysql)
    result = db.execute(SHARE_SQL)
    del db
    return result