def write_sh(self,group_id=0): #指定groupid则只更新group_id的分组 engine=conn.meta('etl_data') sshcon=ssh_con() ssh_uat=ssh_cmd(sshcon.ssh_uat) ssh_sc=ssh_cmd(sshcon.ssh_sc) sql_txt=""" SELECT group_id,sql_file,cmds FROM job_group_set where del_flag=0 and freq_type='{0}' order by group_id,rank_id """ job_group=pd.read_sql(sql_txt.format(self.frency),engine) #if group_id<1 or group_id>self.group_num: gp_map,gp_sql=self.group_sh() #将文件清空 for i in gp_map.keys(): filepath=confs.main_path_bin+gp_map[i] f=open(filepath, 'a',encoding='utf-8') #打开文件 tp=list(job_group[job_group['group_id']==i]['cmds']) for sqls in tp: f.write(sqls) f.write("\n") f.close() ssh_uat.upload(filepath,confs.remote_path_bin+gp_map[i]) ssh_sc.upload(filepath,confs.remote_path_bin+gp_map[i]) ssh_uat.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh_sc.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh_uat.close() ssh_sc.close() return 1
def sdd_table(self,db,tb_list):#uat和生产环境同步建SDD表 sshcon=ssh_con() ssh=ssh_cmd(sshcon.ssh_uat) is_success=ssh.hive_ddl(db,tb_list) if is_success>0: ssh=ssh_cmd(sshcon.ssh_sc) ssh.hive_ddl(db,tb_list) ssh.close()
def auto_deploy(self,tar_ssh='ssh_uat'): tb_list=self.read_deploy() print(tb_list) sshcon=ssh_con() #ssh=ssh_cmd(sshcon.ssh_uat) if tar_ssh=='ssh_sc': self.ssh=ssh_cmd(sshcon.ssh_sc) ssh=self.ssh for tb in tb_list: heads=tb[0:4] if heads in confs.db_map.keys(): print('\n sqoop同步配置:',tb) tp_tb=tb[5:] tar_cmd=heads+' '+tp_tb+' auto' tb_size=conn.sljr_tb_size(db=heads,tb=tp_tb) if conn.etl_set_exists(tb)>0: print(tb,'目标表已经加入了调度,如果需要重新调度请手动修改') break if tb_size<0: print(tp_tb,'表不存在不能同步,或者检查表名') break if tb_size>10000000: print(tp_tb,'大于1千万需要增量同步:',tb_size) tar_cmd=tar_cmd+' inc' if conn.hive_tb_exists(tb)==0: self.sdd_table(db=heads,tb_list=[tp_tb]) #同步表结构 group_sh=confs.local_path+'bin/sqoop_'+heads+'.sh' tar_cmd=confs.sqoop_sh+tar_cmd if self.append_sh(group_sh,tar_cmd)>0: if ssh.cmd_run([tar_cmd])>0: ssh.upload(group_sh,confs.remote_path+'bin/sqoop_'+heads+'.sh') else: print(heads,'shell文件配置错位') break else: #hive sql配置 print('\n hive sql同步配置检测:',tb) flag,tar_tb,depd_list=self.check_deploy(tb) if flag==0: print('\033[1;37;45m ERROR:',tb,' 配置文件检查错误 \033[0m') break else: print('检测通过:',tb) ssh.upload(confs.main_path+'cfg/'+tb+'.properties',confs.remote_path+'cfg/'+tb+'.properties') ssh.upload(confs.main_path+'sql/'+tb+'.sql',confs.remote_path+'sql/'+tb+'.sql') #ssh.upload(confs.main_path+'bin/'+tb+'.sh',confs.remote_path+'bin/'+tb+'.sh') tar_cmd=confs.hive_sh+tb+'.sql' #print('执行数据同步完成') if ssh.cmd_run([tar_cmd])>0: if self.add_job(tb+'.sql',tar_tb,depd_list)>0: self.write_sh() else: #self.write_sh() print('\033[1;37;45m ERROR:',tb,' sql执行错误,请修改 \033[0m') ssh.cmd_run(['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh.close()
def __init__(self,group_num=10,frency='d',tar_ssh='ssh_uat'): self.group_num=group_num if frency in ['d','w','m']:#d 表示天 w 表示zhou m表示月 self.frency=frency else: print('frency 参数只能是 d(天),w(周),m(月) ') raise Exception("frency 参数只能是 d(天),w(周),m(月) ") self.group_name=frency+'_run_group' sshcon=ssh_con() self.ssh=ssh_cmd(sshcon.ssh_uat) if tar_ssh=='ssh_sc': self.ssh=ssh_cmd(sshcon.ssh_sc)
def run_sql(self,tb,tar_ssh='ssh_uat'): sshcon=ssh_con() ssh=ssh_cmd(sshcon.ssh_uat) if tar_ssh=='ssh_sc': ssh=ssh_cmd(sshcon.ssh_sc) flag,tar_tb,depd_list=self.check_deploy(tb) if flag==0: print('\033[1;37;45m ERROR:',tb,' 配置文件检查错误 \033[0m') else: print('检测通过:',tb) ssh.upload(confs.main_path+'cfg/'+tb+'.properties',confs.remote_path+'cfg/'+tb+'.properties') ssh.upload(confs.main_path+'sql/'+tb+'.sql',confs.remote_path+'sql/'+tb+'.sql') tar_cmd=confs.hive_sh+tb+'.sql' #print('执行数据同步完成') if ssh.cmd_run([tar_cmd])>0: print('执行成功') else: print('\033[1;37;45m ERROR:',tb,' sql执行错误,请修改 \033[0m') ssh.close()
def auto_deploy(etl_group, tar_ssh='ssh_uat'): tb_list = read_deploy() sshcon = ssh_con() ssh = ssh_cmd(sshcon.ssh_uat) if tar_ssh == 'ssh_sc': ssh = ssh_cmd(sshcon.ssh_sc) for tb in tb_list: heads = tb[0:4] if heads in confs.db_map.keys(): print('sqoop同步配置:', tb) tp_tb = tb[5:] tar_cmd = db + ' ' + tb + ' auto' tb_size = conn.sljr_tb_size(db=heads, tb=tp_tb) if conn.etl_set_exists(tb) > 0: print(tb, '目标表已经加入了调度,如果需要重新调度请手动修改') break if tb_size < 0: print(tp_tb, '表不存在不能同步,或者检查表名') break if tb_size > 10000000: print(tp_tb, '大于1千万需要增量同步:', tb_size) tar_cmd = tar_cmd + ' inc' if conn.hive_tb_exists(tb) == 0: sdd_table(db=heads, tb_list=[tp_tb]) #同步表结构 group_sh = confs.local_path + 'bin/sqoop_' + heads + '.sh' if append_sh(group_sh, tar_cmd) > 0: ssh.upload(group_sh, confs.remote_path + 'bin/sqoop_' + heads + '.sh') else: print(heads, 'shell文件配置错位') break else: #hive sql配置 print('hive sql同步配置检测:', tb) flag, tar_tb = check_deploy(tb) if flag == 0: print(tb, '配置文件检查错误') break else: print('检测通过:', tb) if tb in etl_group.keys(): if conn.etl_set_exists(tar_tb) > 0: print(tar_tb, '目标表已经加入了调度,如果需要重新调度请手动修改') else: group_sh = confs.local_path + 'bin/' + etl_group[tb] if append_sh(group_sh, tb + '.sql') > 0: ssh.upload( group_sh, confs.remote_path + 'bin/' + etl_group[tb]) else: print(etl_group[tb], 'shell文件配置错位') break ssh.upload(confs.main_path + 'cfg/' + tb + '.properties', confs.remote_path + 'cfg/' + tb + '.properties') ssh.upload(confs.main_path + 'sql/' + tb + '.sql', confs.remote_path + 'sql/' + tb + '.sql') ssh.upload(confs.main_path + 'bin/' + tb + '.sh', confs.remote_path + 'bin/' + tb + '.sh') ssh.cmd_run(['chmod 755 -R /home/bigdata/bin']) else: print('脚本没有指定分组调度') break ssh.close()
def get_sc_hive_dml(): etl_data=conn.meta() tbs_sql=""" select -- d.`NAME` db_name, concat( d.`NAME`,'.', t.TBL_NAME) tb_name, tp.tb_com tb_name_cn, v.COLUMN_NAME col_name, v.`COMMENT` col_comment, v.TYPE_NAME col_data_type,CURRENT_DATE() check_date from hive.columns_v2 v inner join hive.sds s on v.CD_ID=s.CD_ID inner join hive.tbls t on s.sd_id=t.sd_id inner join hive.dbs d on d.db_id=t.db_id LEFT JOIN(select s.tbl_id tb_id, max(if(PARAM_KEY='comment',PARAM_VALUE,null)) tb_com, FROM_UNIXTIME(max(if(PARAM_KEY='transient_lastDdlTime',PARAM_VALUE,null))) last_ddl_time, FROM_UNIXTIME(max(if(PARAM_KEY='last_modified_time',PARAM_VALUE,null))) last_modified_time, max(if(PARAM_KEY='last_modified_by',PARAM_VALUE,'')) last_modified_by from hive.TABLE_PARAMS s GROUP BY s.TBL_ID) tp on t.TBL_ID=tp.tb_id where d.`NAME` in( 'cdi','app') """ part_sql=""" SELECT concat(d.name,'.',t.TBL_NAME) tb_name, p.PKEY_NAME col_name, p.PKEY_COMMENT col_comment, p.PKEY_TYPE col_data_type FROM hive.partition_keys p inner join hive.tbls t on p.tbl_id=t.tbl_id inner join hive.dbs d on d.db_id=t.db_id where d.`NAME` in( 'cdi','app') """ sc=pd.read_sql(tbs_sql,etl_data) parts=pd.read_sql(part_sql,etl_data) ddl_file = open(confs.main_path_py+'hive/sc_hive_tbs.sql', 'w+',encoding='utf-8') tb_list=sc[['tb_name','tb_name_cn']].drop_duplicates() tb_list=tb_list.set_index('tb_name').to_dict()['tb_name_cn'] for tb in tb_list.keys(): ddls="\ndrop table if exists {0};\ncreate table if not exists {0} (".format(tb) tb_com=sc[sc['tb_name']==tb] if tb_com.shape[0]>0: for i in tb_com.index: tb_sql=tb_com.loc[i,'col_name'].ljust(30)+tb_com.loc[i,'col_data_type']+' COMMENT \''+tb_com.loc[i,'col_comment'].replace(';','').replace('\'','')+'\','# ddls=ddls+'\n'+tb_sql ddls=ddls[:-1]+")\n comment '{0}'".format(tb_list[tb]) tp_parts=parts[parts['tb_name']==tb] if tp_parts.shape[0]>0: #print('dsssss',tp_parts) p_str="\npartitioned by (" for kp in tp_parts.index: tb_sql=tp_parts.loc[kp,'col_name'].ljust(10)+tp_parts.loc[kp,'col_data_type']+' COMMENT \''+str(tp_parts.loc[kp,'col_comment'])+'\','# p_str=p_str+'\n'+tb_sql p_str=(p_str[:-1])+')' ddls=ddls+p_str ddls=ddls+'\n STORED AS ORCfile;' ddl_file.write(ddls) ddl_file.write('\n\n') #print(ddls) ddl_file.close() sshcon=ssh_con() ssh=ssh_cmd(sshcon.ssh_uat) ssh.upload(confs.main_path_py+'hive/sc_hive_tbs.sql',confs.remote_path_py+'hive/sc_hive_tbs.sql') ssh.cmd_run(["hive -f '{0}'".format(confs.remote_path_py+'hive/sc_hive_tbs.sql')]) ssh.close() return 1
print(tar_tb, '目标表已经加入了调度,如果需要重新调度请手动修改') else: group_sh = confs.local_path + 'bin/' + etl_group[tb] tar_cmd = hive_sh + tb + '.sql' print('执行命令:', tar_cmd) if append_sh(group_sh, tar_cmd) > 0: if if_run: if ssh.cmd_run([tar_cmd]) > 0: ssh.upload( group_sh, confs.remote_path + 'bin/' + etl_group[tb]) else: print(etl_group[tb], 'shell文件配置错位') break else: print('\033[1;37;45m ERROR:', tb, ' 脚本没有指定分组调度 \033[0m') break ssh.cmd_run( ['chmod 755 -R /home/bigdata/bin /home/bigdata/sql /home/bigdata/cfg']) ssh.close() if __name__ == '__main__': cmd = sqoop_tp() sshcon = ssh_con() ssh = ssh_cmd(sshcon.ssh_sc) ssh.cmd_run(cmd, if_print=0) ssh.close()