def getHdfsDirSizeForSSH(self, hdfsDir): sysConf = ConfModel.getSystemConf() nameNodeAccount = sysConf.get('hadoop').get('hadoop_namenode_account') nameNodeHost = sysConf.get('hadoop').get('hadoop_namenode_host') command = "hdfs dfs -du -s " + hdfsDir commandRs = Process.sshCommand(nameNodeAccount, nameNodeHost, command) rs = {} if (commandRs.get('code') == 0): strToList = commandRs.get('stdoutPut').split('\n') # 过滤多余的行 line = "" for curLine in strToList : if (len(curLine) == 0): continue elif ( "bash" in curLine) : continue else : line = curLine filterList = line.split() rs['dir'] = filterList[2] rs['dataSize'] = int(filterList[0]) rs['hdfsSize'] = int(filterList[1]) else: print commandRs.get('erroutPut') return rs
def importMysqlToHive(self): # 当前选择数据库服务器 dbServer = self.getDbServer() # 获取 带抽取的 Mysql 表结构 ,用来创建 Hive 数据表 mysqlFileds = self.dbServerModel.getFileds(self.getSourceDb(), self.getSourceTable()) mysqlFiledsFormat = '=String,'.join(mysqlFileds) + "=String" # 执行脚本 script = self.systemCorePath['shellPath'] + '/sqoop_import_mysql.sh ' # 参数 script += '--sqoop_home \"' + self.systemConf['sqoop'][ 'sqoop_home'] + '\" ' script += '--local_tmp_dir \"' + self.systemCorePath['tmpPath'] + '\" ' script += '--hdfs_sqoop_tmp_dir \"/tmp/sqoop\" ' script += '--mysql_host \"' + dbServer['mysql_host'] + '\" ' script += '--mysql_port \"' + dbServer['mysql_port'] + '\" ' script += '--mysql_user \"' + dbServer['mysql_user'] + '\" ' script += '--mysql_password \"' + dbServer['mysql_password'] + '\" ' script += '--mysql_database \"' + self.getSourceDb() + '\" ' script += '--mysql_table \"' + self.getSourceTable() + '\" ' script += '--hive_table \"' + self.getTargetDb( ) + '.' + self.getTargetTable() + '\" ' script += '--fields_terminated_by \"' + self.getFieldsTerminated( ) + '\" ' script += '--map_column_hive_fields \"' + mysqlFiledsFormat + '\" ' script += '--mappers_num \"' + str(self.getMapReduceNum()) + '\"' result = Process.runScriptSync(script) return result
def importMysqlToHbase(self, querySql=None): # 当前选择数据库服务器 dbServer = self.getDbServer() tmpDir = self.systemCorePath['tmpPath'] + '/sqoop_outdir' targetDir = '/tmp/sqoop/' + self.getTargetTable() # 执行脚本 script = self.systemConf['sqoop']['sqoop_home'] + '/bin/sqoop import ' script += '--connect \"jdbc:mysql://' + dbServer[ 'mysql_host'] + ':' + dbServer['mysql_port'] + '/' + self.getSourceDb( ) + '?useUnicode=true&tinyInt1isBit=false&characterEncoding=utf-8\" ' script += '--username \"' + dbServer['mysql_user'] + '\" ' script += '--password \"' + dbServer['mysql_password'] + '\" ' script += '--hbase-create-table ' script += '--hbase-table \"' + self.getTargetTable() + '\" ' script += '--column-family \"' + self.getbaseColumnFamily() + '\" ' script += '--m \"' + str(self.getMapReduceNum()) + '\" ' script += '--outdir \"' + tmpDir + '\" ' script += '--target-dir \"' + targetDir + '\" ' script += '--delete-target-dir ' u' 表示整张表导入' if (querySql == None): rmTmpTable = 'rm ' + tmpDir + '/' + self.getSourceTable() + '.java' Process.runScriptSync(rmTmp) script += '--table \"' + self.getSourceTable() + '\" ' else: u' 清理临时文件' rmTmp = 'rm ' + tmpDir + '/QueryResult.java' Process.runScriptSync(rmTmp) u' 表示使用查询语句导入' script += '--hbase-row-key \"' + self.getHbaseRowKey() + '\" ' script += '--split-by \"' + self.getHbaseRowKey() + '\" ' script += '--query \"' + querySql + '\"' #print script result = Process.runScriptSync(script) return result
def mysqlDumpFile(self, sql, file): # sed 格式化规则 # 处理行中的换行符号 rowRegexp = 's/[\\n|\\r\\n]//g;' # 处理行中 NULL 字符串 rowRegexp += 's/NULL/\\\\\N/g;' # 列分隔符 rowRegexp += 's/\t/\001/g;' # 组合命令 script = self.getMysqlCommand( ) + " -N -s -e " "\"" + sql + "\" | sed -e \"" + rowRegexp + "\" > " + file result = Process.runScriptSync(script) return result
def hiveBinInterface(self): #process = Process() #result = process.runScript('ls ~/develop/jason/dw_etl/dw_service ') #print result['stdoutPut'] #list = ['ls ~/develop/jason/dw_etl/dw_service','ls ~/develop/jason/uba'] #list = ['hadoop dfs -ls /user/hive','hadoop dfs -ls /user'] list = [ 'hive -e "select count(*) from dw_db.dw_broker_summary_basis_info_daily;"', 'hive -e "select count(*) from dw_db.dw_cal;"' ] result = Process.runThreadingScripts(list) status = True while (status): sleep(1) for item in result: print item u'获取当前活动的(alive)线程的个数' print item.isAlive()
def runHiveScript(self, sql): hiveHome = self.hiveConf['hiveHome'] runCommand = hiveHome + "/bin/hive -e " + '\"' + sql + '\"' result = Process.runScriptSync(runCommand) return result
def runMysqlCommand(self, sql=''): scriptBase = self.getMysqlCommand() + " -N -s -e " runScript = scriptBase + "\"" + sql + "\"" return Process.runScriptSync(runScript)
def hadoopCommand(self,command): return Process.runScriptSync(command)