Ejemplo n.º 1
0
 def write_sh(self,group_id=0): #指定groupid则只更新group_id的分组
     engine=conn.meta('etl_data')
     sshcon=ssh_con()
     ssh_uat=ssh_cmd(sshcon.ssh_uat)
     ssh_sc=ssh_cmd(sshcon.ssh_sc)
     sql_txt="""
                 SELECT group_id,sql_file,cmds
                 FROM  job_group_set where del_flag=0 and freq_type='{0}'
                 order by group_id,rank_id
               """
     job_group=pd.read_sql(sql_txt.format(self.frency),engine)
     #if group_id<1 or group_id>self.group_num: 
     gp_map,gp_sql=self.group_sh() #将文件清空
     for i in gp_map.keys():
         filepath=confs.main_path_bin+gp_map[i]
         f=open(filepath, 'a',encoding='utf-8') #打开文件
         tp=list(job_group[job_group['group_id']==i]['cmds'])
         for sqls in tp:
             f.write(sqls)
             f.write("\n")
         f.close()
         ssh_uat.upload(filepath,confs.remote_path_bin+gp_map[i])
         ssh_sc.upload(filepath,confs.remote_path_bin+gp_map[i])
     ssh_uat.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg'])
     ssh_sc.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg'])
     ssh_uat.close()
     ssh_sc.close()
     return 1
Ejemplo n.º 2
0
 def sdd_table(self,db,tb_list):#uat和生产环境同步建SDD表
     sshcon=ssh_con()
     ssh=ssh_cmd(sshcon.ssh_uat)
     is_success=ssh.hive_ddl(db,tb_list)
     if is_success>0:
         ssh=ssh_cmd(sshcon.ssh_sc)
         ssh.hive_ddl(db,tb_list)
     ssh.close()      
Ejemplo n.º 3
0
    def auto_deploy(self,tar_ssh='ssh_uat'): 
        tb_list=self.read_deploy()
        print(tb_list)
        sshcon=ssh_con()
        #ssh=ssh_cmd(sshcon.ssh_uat)
        if tar_ssh=='ssh_sc':
            self.ssh=ssh_cmd(sshcon.ssh_sc)
        ssh=self.ssh
        for tb in tb_list:
            heads=tb[0:4]
            if heads in confs.db_map.keys():
                print('\n  sqoop同步配置:',tb)
                tp_tb=tb[5:]
                tar_cmd=heads+' '+tp_tb+' auto'
                tb_size=conn.sljr_tb_size(db=heads,tb=tp_tb)
                if conn.etl_set_exists(tb)>0:
                    print(tb,'目标表已经加入了调度,如果需要重新调度请手动修改')
                    break
                if tb_size<0:
                    print(tp_tb,'表不存在不能同步,或者检查表名')
                    break
                if tb_size>10000000:
                    print(tp_tb,'大于1千万需要增量同步:',tb_size)
                    tar_cmd=tar_cmd+' inc'
                if conn.hive_tb_exists(tb)==0:
                    self.sdd_table(db=heads,tb_list=[tp_tb]) #同步表结构
                group_sh=confs.local_path+'bin/sqoop_'+heads+'.sh'
                tar_cmd=confs.sqoop_sh+tar_cmd
                if self.append_sh(group_sh,tar_cmd)>0:  
                    if ssh.cmd_run([tar_cmd])>0:
                        ssh.upload(group_sh,confs.remote_path+'bin/sqoop_'+heads+'.sh')
                else:
                    print(heads,'shell文件配置错位')
                    break
            else:
                #hive sql配置
                print('\n  hive sql同步配置检测:',tb)
                flag,tar_tb,depd_list=self.check_deploy(tb)
                if flag==0:
                    print('\033[1;37;45m ERROR:',tb,'  配置文件检查错误        \033[0m')
                    break
                else:
                    print('检测通过:',tb)
                    ssh.upload(confs.main_path+'cfg/'+tb+'.properties',confs.remote_path+'cfg/'+tb+'.properties')
                    ssh.upload(confs.main_path+'sql/'+tb+'.sql',confs.remote_path+'sql/'+tb+'.sql')
                    #ssh.upload(confs.main_path+'bin/'+tb+'.sh',confs.remote_path+'bin/'+tb+'.sh')
                    tar_cmd=confs.hive_sh+tb+'.sql'
                    #print('执行数据同步完成')
                    if ssh.cmd_run([tar_cmd])>0:
                        if self.add_job(tb+'.sql',tar_tb,depd_list)>0:
                            self.write_sh()
                    else:
                        #self.write_sh()
                        print('\033[1;37;45m ERROR:',tb,' sql执行错误,请修改        \033[0m')

        ssh.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg'])
        ssh.close()
Ejemplo n.º 4
0
 def __init__(self,group_num=10,frency='d',tar_ssh='ssh_uat'):
      self.group_num=group_num
      if frency in ['d','w','m']:#d 表示天 w 表示zhou m表示月   
          self.frency=frency
      else:
          print('frency 参数只能是 d(天),w(周),m(月) ')
          raise Exception("frency 参数只能是 d(天),w(周),m(月) ")                    
      self.group_name=frency+'_run_group'
      sshcon=ssh_con()
      self.ssh=ssh_cmd(sshcon.ssh_uat)
      if tar_ssh=='ssh_sc':
          self.ssh=ssh_cmd(sshcon.ssh_sc)
Ejemplo n.º 5
0
 def run_sql(self,tb,tar_ssh='ssh_uat'):  
     sshcon=ssh_con()
     ssh=ssh_cmd(sshcon.ssh_uat)
     if tar_ssh=='ssh_sc':
         ssh=ssh_cmd(sshcon.ssh_sc)
     flag,tar_tb,depd_list=self.check_deploy(tb)
     if flag==0:
         print('\033[1;37;45m ERROR:',tb,'  配置文件检查错误        \033[0m')
     else:
        print('检测通过:',tb)
        ssh.upload(confs.main_path+'cfg/'+tb+'.properties',confs.remote_path+'cfg/'+tb+'.properties')
        ssh.upload(confs.main_path+'sql/'+tb+'.sql',confs.remote_path+'sql/'+tb+'.sql')
        tar_cmd=confs.hive_sh+tb+'.sql'
        #print('执行数据同步完成')
        if ssh.cmd_run([tar_cmd])>0:
            print('执行成功')
        else:
            print('\033[1;37;45m ERROR:',tb,' sql执行错误,请修改        \033[0m')
     ssh.close()        
Ejemplo n.º 6
0
def auto_deploy(etl_group, tar_ssh='ssh_uat'):
    tb_list = read_deploy()
    sshcon = ssh_con()
    ssh = ssh_cmd(sshcon.ssh_uat)
    if tar_ssh == 'ssh_sc':
        ssh = ssh_cmd(sshcon.ssh_sc)
    for tb in tb_list:
        heads = tb[0:4]
        if heads in confs.db_map.keys():
            print('sqoop同步配置:', tb)
            tp_tb = tb[5:]
            tar_cmd = db + ' ' + tb + ' auto'
            tb_size = conn.sljr_tb_size(db=heads, tb=tp_tb)
            if conn.etl_set_exists(tb) > 0:
                print(tb, '目标表已经加入了调度,如果需要重新调度请手动修改')
                break
            if tb_size < 0:
                print(tp_tb, '表不存在不能同步,或者检查表名')
                break
            if tb_size > 10000000:
                print(tp_tb, '大于1千万需要增量同步:', tb_size)
                tar_cmd = tar_cmd + ' inc'
            if conn.hive_tb_exists(tb) == 0:
                sdd_table(db=heads, tb_list=[tp_tb])  #同步表结构
            group_sh = confs.local_path + 'bin/sqoop_' + heads + '.sh'
            if append_sh(group_sh, tar_cmd) > 0:
                ssh.upload(group_sh,
                           confs.remote_path + 'bin/sqoop_' + heads + '.sh')
            else:
                print(heads, 'shell文件配置错位')
                break
        else:
            #hive sql配置
            print('hive sql同步配置检测:', tb)
            flag, tar_tb = check_deploy(tb)
            if flag == 0:
                print(tb, '配置文件检查错误')
                break
            else:
                print('检测通过:', tb)
                if tb in etl_group.keys():
                    if conn.etl_set_exists(tar_tb) > 0:
                        print(tar_tb, '目标表已经加入了调度,如果需要重新调度请手动修改')
                    else:
                        group_sh = confs.local_path + 'bin/' + etl_group[tb]
                        if append_sh(group_sh, tb + '.sql') > 0:
                            ssh.upload(
                                group_sh,
                                confs.remote_path + 'bin/' + etl_group[tb])
                        else:
                            print(etl_group[tb], 'shell文件配置错位')
                            break
                    ssh.upload(confs.main_path + 'cfg/' + tb + '.properties',
                               confs.remote_path + 'cfg/' + tb + '.properties')
                    ssh.upload(confs.main_path + 'sql/' + tb + '.sql',
                               confs.remote_path + 'sql/' + tb + '.sql')
                    ssh.upload(confs.main_path + 'bin/' + tb + '.sh',
                               confs.remote_path + 'bin/' + tb + '.sh')
                    ssh.cmd_run(['chmod 755 -R /home/bigdata/bin'])
                else:
                    print('脚本没有指定分组调度')
                    break
    ssh.close()
Ejemplo n.º 7
0
def get_sc_hive_dml():
        etl_data=conn.meta()
        tbs_sql="""
          select -- d.`NAME` db_name,
                concat( d.`NAME`,'.', t.TBL_NAME) tb_name,
                tp.tb_com tb_name_cn,
                v.COLUMN_NAME col_name, 
                v.`COMMENT` col_comment,
                v.TYPE_NAME  col_data_type,CURRENT_DATE() check_date
            from hive.columns_v2 v 
            inner join hive.sds s on v.CD_ID=s.CD_ID 
            inner join hive.tbls t on s.sd_id=t.sd_id 
            inner join hive.dbs d on d.db_id=t.db_id 
            LEFT JOIN(select s.tbl_id tb_id,
                    max(if(PARAM_KEY='comment',PARAM_VALUE,null)) tb_com,
                    FROM_UNIXTIME(max(if(PARAM_KEY='transient_lastDdlTime',PARAM_VALUE,null))) last_ddl_time,
                    FROM_UNIXTIME(max(if(PARAM_KEY='last_modified_time',PARAM_VALUE,null))) last_modified_time,
                    max(if(PARAM_KEY='last_modified_by',PARAM_VALUE,'')) last_modified_by
            from hive.TABLE_PARAMS s GROUP BY s.TBL_ID) tp on t.TBL_ID=tp.tb_id
            where d.`NAME` in( 'cdi','app') 
        """
        part_sql="""        SELECT       
                concat(d.name,'.',t.TBL_NAME) tb_name,
                p.PKEY_NAME col_name, 
                p.PKEY_COMMENT col_comment,
                p.PKEY_TYPE  col_data_type
            FROM hive.partition_keys p
            inner join hive.tbls t on p.tbl_id=t.tbl_id 
            inner join hive.dbs d on d.db_id=t.db_id 
            where d.`NAME` in( 'cdi','app') """
        sc=pd.read_sql(tbs_sql,etl_data)
        parts=pd.read_sql(part_sql,etl_data)
        ddl_file = open(confs.main_path_py+'hive/sc_hive_tbs.sql', 'w+',encoding='utf-8')
        tb_list=sc[['tb_name','tb_name_cn']].drop_duplicates()
        tb_list=tb_list.set_index('tb_name').to_dict()['tb_name_cn']
        for tb in tb_list.keys():
            ddls="\ndrop table if exists {0};\ncreate table if not exists {0} (".format(tb)
            tb_com=sc[sc['tb_name']==tb]
            if tb_com.shape[0]>0:
                for i in tb_com.index:
                    tb_sql=tb_com.loc[i,'col_name'].ljust(30)+tb_com.loc[i,'col_data_type']+' COMMENT \''+tb_com.loc[i,'col_comment'].replace(';','').replace('\'','')+'\','#
                    ddls=ddls+'\n'+tb_sql
            ddls=ddls[:-1]+")\n comment '{0}'".format(tb_list[tb])
            tp_parts=parts[parts['tb_name']==tb]
            if tp_parts.shape[0]>0:
                #print('dsssss',tp_parts)
                p_str="\npartitioned by (" 
                for kp in tp_parts.index:
                    tb_sql=tp_parts.loc[kp,'col_name'].ljust(10)+tp_parts.loc[kp,'col_data_type']+' COMMENT \''+str(tp_parts.loc[kp,'col_comment'])+'\','#
                    p_str=p_str+'\n'+tb_sql
                p_str=(p_str[:-1])+')'
                ddls=ddls+p_str
            ddls=ddls+'\n STORED AS ORCfile;'
            ddl_file.write(ddls)
            ddl_file.write('\n\n')
            #print(ddls)
        ddl_file.close()
        sshcon=ssh_con()
        ssh=ssh_cmd(sshcon.ssh_uat)
        ssh.upload(confs.main_path_py+'hive/sc_hive_tbs.sql',confs.remote_path_py+'hive/sc_hive_tbs.sql')
        ssh.cmd_run(["hive -f '{0}'".format(confs.remote_path_py+'hive/sc_hive_tbs.sql')])
        ssh.close()
        return 1
Ejemplo n.º 8
0
                        print(tar_tb, '目标表已经加入了调度,如果需要重新调度请手动修改')
                    else:
                        group_sh = confs.local_path + 'bin/' + etl_group[tb]
                        tar_cmd = hive_sh + tb + '.sql'
                        print('执行命令:', tar_cmd)
                        if append_sh(group_sh, tar_cmd) > 0:
                            if if_run:
                                if ssh.cmd_run([tar_cmd]) > 0:
                                    ssh.upload(
                                        group_sh, confs.remote_path + 'bin/' +
                                        etl_group[tb])
                        else:
                            print(etl_group[tb], 'shell文件配置错位')
                            break
                else:

                    print('\033[1;37;45m ERROR:', tb,
                          '  脚本没有指定分组调度        \033[0m')
                    break
    ssh.cmd_run(
        ['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg'])
    ssh.close()


if __name__ == '__main__':
    cmd = sqoop_tp()
    sshcon = ssh_con()
    ssh = ssh_cmd(sshcon.ssh_sc)
    ssh.cmd_run(cmd, if_print=0)
    ssh.close()