def postgresql_create_hiveTable(self, database, dst_table, src_table, pg, jobid): loger = logger.Logger("Method:postgresql_create_hiveTable") try: sqls_list = [] sql1 = "create database if not exists `%s`"%(database) sql2 = "use `%s`"%(database) struTable = "select column_name, data_type from information_schema.columns where table_name = '%s' ORDER BY ordinal_position"%(src_table) fieldsName, fieldsValue = pg.getTableInfo(struTable) createtable1 = 'create table if not exists `%s`(' %(dst_table) createtable2 = '' for i in xrange(0, len(fieldsName)): data_type_obj = postgres_hive_reader.PostgresqlHiveReader() fieldsValue[i] = data_type_obj.convert_data_type(fieldsValue[i]) createtable2 += bytes(fieldsName[i]) + " " + bytes(fieldsValue[i]) if i != len(fieldsName) - 1: createtable2 += ', ' else: createtable2 += ')' sql3 = createtable1 + createtable2 sqls_list.append(sql1) sqls_list.append(sql2) sqls_list.append(sql3) return sqls_list except Exception as error_message: loger.print_error("create hive table by postgresql schema error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status(self.jobid, JobStatus.EXCEPTION) raise error_message
def oracle_create_hiveTable(self, dst_database, dst_table, table, oc, jobid, table_owner): loger = logger.Logger("Method:oracle_create_hiveTable") try: sqls_list = [] sql1 = "create database if not exists `%s`"%(dst_database) sql2 = "use %s"%(dst_database) struTable = "select column_name,data_type, data_length From dba_tab_columns where table_name='%s' and owner = '%s' ORDER BY COLUMN_ID"%(table.upper(), table_owner) print struTable fieldsName, fieldsValue, field_Length = oc.getTableInfo(struTable) createtable1 = 'create table `%s`(' %(dst_table.lower()) createtable2 = '' for i in xrange(0, len(fieldsName)): ohr = oracle_hive_reader.OracleHiveReader() fieldsValue[i] = ohr.convert_data_type(fieldsValue[i]) if fieldsValue[i] != 'int' and fieldsValue[i] != 'integer': createtable2 += "`" + bytes(fieldsName[i]) + "`" + " " + bytes(fieldsValue[i]) else: createtable2 += "`" + bytes(fieldsName[i]) + "`" + " " + bytes(fieldsValue[i]) if i != len(fieldsName) - 1: createtable2 += ', ' else: createtable2 += ')' sql3 = createtable1 + createtable2 sqls_list.append(sql1) sqls_list.append(sql2) sqls_list.append(sql3) return sqls_list except Exception as error_message: loger.print_error("create hive table by oracle schema error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status(self.jobid, JobStatus.EXCEPTION) raise error_message
def oracle_pxf_write_table(obj, dst_table, src_table, host, database, operation, jobid, table_owner): loger = logger.Logger("Method:oracle_pxf_write_table") try: primary_keys1, serial_key, maxval, columns1 = obj.oracle_table_attributes( src_table.upper(), table_owner) table_sql = [] port = 51200 create_ext_table_sql = "CREATE EXTERNAL TABLE %s_ext (%s)\nLOCATION (\'pxf://%s:%s/%s.%s?PROFILE=Hive\') FORMAT \'custom\' (formatter=\'pxfwritable_import\');" % ( dst_table.lower(), columns1, host, port, database, dst_table.lower()) loger.print_info(create_ext_table_sql) loger.print_info("generate the pxf's sql language") if operation == 2 or operation == '2': table_sql.append('INSERT INTO "%s" SELECT * FROM "%s_ext";' % (dst_table.lower(), dst_table.lower())) return table_sql else: table_sql.append(create_ext_table_sql) table_sql.append('CREATE TABLE "%s" (\n%s\n)\nWITHOUT OIDS;' % (dst_table.lower(), columns1)) table_sql.append('INSERT INTO "%s" SELECT * FROM "%s_ext";' % (dst_table.lower(), dst_table.lower())) loger.print_info(table_sql) return table_sql except Exception as error_message: loger.print_error("oracle pxf_service error!" + str(error_message)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception') raise error_message
def getTable_hbase_columns(self, table, column_family, statement_dt, statement): loger = logger.Logger("Method:getTable_hbase_columns") try: table_sql = "select column_name, data_type from information_schema.columns where table_name = '%s' ORDER BY ordinal_position" % ( table) columns_name, data_type = self.getTableInfo(table_sql) columns = "recordkey %s, " % (statement_dt) count = 0 for i in range(len(columns_name)): columns = str(columns) + '"' + str(column_family) + ':' + str( columns_name[i]) + '"' columns += ' ' columns += data_type[i] if count != (len(columns_name) - 1): columns += ', ' count += 1 columns_new = '' count1 = 0 for j in range(len(columns_name)): if columns_name[j] == statement: columns_new += 'recordkey' else: columns_new += '"' + str(column_family) + ':' + str( columns_name[j]) + '"' if count1 != (len(columns_name) - 1): columns_new += ', ' count1 += 1 return columns, columns_new except Exception as error_message: loger.print_error("get getTable_hbase_columns error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status( self.jobid, JobStatus.EXCEPTION) raise error_message
def __init__(self, conf=config, db_host=None, user=None, password=None, database=None, port=None, jobid=None): try: loger = logger.Logger("Method:init") if conf: self.conn = psycopg2.connect( host=conf.gears_host, port=conf.gears_port, user=conf.gears_user, password=conf.gears_passwd, database=conf.gears_db, ) else: self.test = db_host, port, user, password, database self.conn = psycopg2.connect(host=db_host, port=port, user=user, password=password, database=database) self.conn.set_client_encoding('utf-8') self.jobid = jobid except Exception as error_message: loger.print_error("HDB connection error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status( self.jobid, JobStatus.EXCEPTION) raise error_message
def mysql_pxf_write_table(obj, dst_table, src_table, host, database, operation, jobid): loger = logger.Logger("Method:Mysql_pxf_write_table") try: primary_keys1, columns1 = obj.mysql_table_attributes(src_table) table_sql = [] port = 51200 create_ext_table_sql = "CREATE EXTERNAL TABLE %s_ext (%s)\nLOCATION (\'pxf://%s:%s/%s.%s?PROFILE=Hive\') FORMAT \'custom\' (formatter=\'pxfwritable_import\');" % ( dst_table, columns1, host, port, database, dst_table) loger.print_info(create_ext_table_sql) if operation == 2 or operation == '2': table_sql.append("INSERT INTO %s SELECT * FROM %s_ext;" % (dst_table, dst_table)) return table_sql else: table_sql.append(create_ext_table_sql) table_sql.append("CREATE TABLE %s (\n%s\n)\nWITHOUT OIDS;" % (dst_table, columns1)) starttime = datetime.datetime.now() loger.print_info("[INFO]:Pxf Server Start Import, Now is %s" % starttime) table_sql.append("INSERT INTO %s SELECT * FROM %s_ext;" % (dst_table, dst_table)) finishtime = datetime.datetime.now() loger.print_info("[INFO]:Pxf Start Import, Now Is %s" % finishtime) usedtime = finishtime - starttime loger.print_info("[INFO]:Total Run Time Is %s" % (usedtime)) loger.print_info(table_sql) return table_sql except Exception as error_message: loger.print_error("mysql pxf_service error" + str(error_message)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception') raise error_message
def mysql_hbase_pxf_write_table(obj, dst_table, src_table, host, database, jobid, statement_dt, statement): loger = logger.Logger("Method:mysql_hbase_pxf_write_table") try: columns_family = '%s' % (dst_table) primary_keys1, columns1, columns_new = obj.mysql_external_table_hbase_attributes( src_table, columns_family, statement_dt, statement) primary_keys1, columns2 = obj.mysql_table_attributes(src_table) table_sql = [] port = 51200 create_ext_table_sql = "CREATE EXTERNAL TABLE %s_ext (%s)\nLOCATION (\'pxf://%s:%s/%s?PROFILE=HBase\') FORMAT \'custom\' (formatter=\'pxfwritable_import\');" % ( dst_table, columns1, host, port, dst_table) table_sql.append(create_ext_table_sql) table_sql.append("CREATE TABLE %s (\n%s\n)\nWITHOUT OIDS;" % (dst_table, columns2)) table_sql.append("INSERT INTO %s SELECT %s FROM %s_ext;" % (dst_table, columns_new, dst_table)) loger.print_info(table_sql) return table_sql except Exception as error_message: loger.print_error("mysql hbase pxf_service error" + str(error_message)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception') raise error_message
def create_table(self, db, table, token): loger = logger.Logger("create_table") url = self.baseUri + "/%s/%s" % (db, table) headers = { 'Accept-Charset': 'utf-8', 'Content-Type': 'application/json', 'X-AUTH-TOKEN': '%s' % (token) } loger.print_info(config) try: columns = ModuleBase().get_mycolumns('base1.zetyun.com', config.hawq_port, config.hawq_user, config.hawq_passwd, db, "public", table) except Exception as error_message: loger.print_error(error_message) raise error_message loger.print_info(columns) ccc = [{"name": c[0], "col_type": c[1]} for c in columns] requestJson = {"tbl_type": "csvTable", "columns": ccc, "metas": {}} resp = requests.post(url, headers=headers, json=requestJson) if resp.status_code != 200: loger.print_info(resp.content) return None else: connResp = json.loads(resp.content) loger.print_info(connResp) loger.print_info(connResp["status"]) if not connResp["status"]: return None return True
def pxf_write_table(sqc, dst_table, table, dst_hostname, dst_database, operation, jobid): loger = logger.Logger("SQLServerCourier.pxf_write_table") try: primary_keys1, columns1 = sqc.sqlserver_table_attributes(table) table_sql = [] port = 51200 create_ext_table_sql = "CREATE EXTERNAL TABLE %s_ext (%s)\nLOCATION (\'pxf://%s:%s/%s.%s?PROFILE=Hive\') FORMAT \'custom\' (formatter=\'pxfwritable_import\');" % ( dst_table, columns1, dst_hostname, port, dst_database, dst_table) loger.print_info(create_ext_table_sql) print "generate the pxf's sql language" if operation == 2 or operation == '2': table_sql.append('INSERT INTO %s SELECT * FROM %s_ext;' % (dst_table, dst_table)) return table_sql else: table_sql.append(create_ext_table_sql) table_sql.append('CREATE TABLE %s (\n%s\n)\nWITHOUT OIDS;' % (dst_table, columns1)) table_sql.append('INSERT INTO %s SELECT * FROM %s_ext;' % (dst_table, dst_table)) loger.print_info(table_sql) return table_sql except Exception as e: loger.print_error("sqlserver pxf_service error" + str(e)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception')
def mysql_create_hiveTable(self, database, dst_table, src_table, obj, jobid): loger = logger.Logger("Method:mysql_create_hiveTable") try: sqls_list = [] sql1 = "create database if not exists `%s`"%(database) sql2 = "use `%s`"%(database) struTable = "select COLUMN_NAME, DATA_TYPE from INFORMATION_SCHEMA.Columns where table_name= '%s'" %(src_table) fieldsName, fieldsValue = obj.getTableInfo(struTable) createtable1 = "create table if not exists `%s`(" %(dst_table) createtable2 = '' for i in xrange(0, len(fieldsName)): data_type_obj = mysql_hive_reader.MysqlHiveReader() fieldsValue[i] = data_type_obj.convert_data_type(fieldsValue[i]) createtable2 += "`" + bytes(fieldsName[i]) + "`" + " " + bytes(fieldsValue[i]) if i != len(fieldsName) - 1: createtable2 += ', ' else: createtable2 += ')' sql3 = createtable1 + createtable2 sqls_list.append(sql1) sqls_list.append(sql2) sqls_list.append(sql3) return sqls_list except Exception as error_message: loger.print_error("create hive table by mysql schema error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status(self.jobid, JobStatus.EXCEPTION) raise error_message
def __init__(self, json_params): try: loger = logger.Logger("Method:init") loger.print_info("Start Paraser") self.jobid = json_params["jobid"] self.token = json_params["token"] self.task_name = json_params["name"] self.description = json_params["description"] self.files = json_params["params"]["src_conn"]["shell_helper"] if type(self.files) == list: pass else: loger.print_info("convert filename format to list") files = unicodedata.normalize('NFKD', self.files).encode( 'utf-8', 'ignore') files = files.replace('[', '').replace(']', '') files = files.split(',') files_list = [] for i in range(len(files)): files[i] = files[i].replace("'", '') files_list.append(files[i]) self.files = files_list self.files_encoding = json_params["params"]["src_conn"][ "files_encoding"] self.columns_name = json_params["params"]["src_conn"][ "columns_name"] self.datacolumn_first = json_params["params"]["src_conn"][ "DataColumn_first"] if self.datacolumn_first < 1: db_conns.importjob_conn.update_status(self.jobid, JobStatus.EXCEPTION) self.datacolumn_last = json_params["params"]["src_conn"][ "DataColumn_last"] self.dst_database = json_params["params"]["dst_conn"]["database"] self.dst_hostname = json_params["params"]["dst_conn"]["hostname"] if self.dst_database == None: db_conns.importjob_conn.update_status(self.jobid, JobStatus.EXCEPTION) self.tableinfo = json_params["params"]["dst_conn"]["tableinfo"] self.t_type = json_params["params"]["dst_conn"]["tableinfo"][ "Ttype"] self.separatorsign = json_params["params"]["dst_conn"][ "tableinfo"]["separatorsign"] self.continue_with_error = json_params["params"]["dst_conn"][ "tableinfo"]["continue_with_error"] self.tablename = json_params["params"]["dst_conn"]["tableinfo"][ "tablename"] self.tablename = str(self.tablename).lower() if self.t_type == 1 or self.t_type == '1': self.columns = self.tableinfo["columns"] self.dst_ehc_id = json_params["params"]["dst_conn"]["ehc_id"] self.dst_clustername = json_params["clustername"] loger.print_info("Start Paraser") except Exception as error_message: loger.print_error("file parser the params error!" + str(error_message)) importjob = db_conns.importjob_conn.update_status( self.jobid, JobStatus.EXCEPTION) raise error_message
def rename(filename): loger = logger.Logger("Method:rename") loger.print_info("Start rename") file_new = str(filename).split(".")[0] + '.csv' rename_cmd = "hdfs dfs -mv %s %s" % (filename, file_new) os.system(rename_cmd) loger.print_info("Finish rename") return file_new
def default(self, obj): try: loger = logger.Logger("CustomJSONEncoder.default") if isinstance(obj, datetime): return obj.astimezone(pytz.utc).strftime("%Y-%m-%dT%H:%M:%SZ") except TypeError as e: loger.print_error(str(e)) return JSONEncoder.default(self, obj)
def upload_file(filename): loger = logger.Logger("Method:upload_file") loger.print_info("Start uplaod file") upload_cmd = "hdfs dfs -put %s %s"%(filename, config.load_catalog) os.system(upload_cmd) loger.print_info("Finish uplaod file") file_new = config.load_catalog + str(filename).split("/")[-1] return file_new
def uncompress_file(filename): loger = logger.Logger("Method:uncompress") loger.print_info("Start uncompress") uncompress_cmd = "tar -zxvf %s"%(filename) os.system(uncompress_cmd) file_new = str(filename).split(".tar")[0] + '.del' loger.print_info("FInish uncompress") return file_new
def download_compress(filename): loger = logger.Logger("Method:download_compress") loger.print_info("Start downlaod compress file") download_cmd = "hdfs dfs -get %s %s"%(filename, config.local_cache) loger.print_info(download_cmd) os.system(download_cmd) filename = config.local_cache + str(filename).split("/")[-1] loger.print_info("Finish downlaod compress file") return filename
def __init__(self, host, port, version="v1"): loger = logger.Logger("Method:init") try: self.webapp = "" self.baseUri = "http://%s:%s/%s/metatable/create" % (host, port, version) loger.print_info("ddl client init: " + self.baseUri) except Exception as error_message: loger.print_error(error_message) raise error_message
def sqoop_data_hbase(host, port, db, username, password, jobid, hbase_database, dst_table, src_table, statement, statement_value): loger = logger.Logger("Method:soop_data_hbase") try: sqoop_str = "sqoop import --connect jdbc:postgresql://%s:%s/%s --table %s --hbase-table %s --column-family %s --hbase-create-table -username %s -password %s --incremental append --check-column %s --last-value %s --null-string ' ' --null-non-string ' ' --hive-drop-import-delims"%(host, port, db, src_table, dst_table, dst_table, username, password, statement, statement_value) loger.print_info(sqoop_str) os.system(sqoop_str) except Exception as error_message: loger.print_error("postgresql sqoop into hbase failed" + str(error_message)) importjob = db_conns.importjob_conn.update_status(jobid, 'exception') raise error_message
def execute_meod(self, sql): loger = logger.Logger("Method:execute_meod") try: cursor = self.conn.cursor() rt = cursor.execute(sql) return rt except Exception as error_message: loger.print_error("Execute SQL engine error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status(self.jobid, JobStatus.EXCEPTION) raise error_message
def remove_csv_header(filename, jobid): loger = logger.Logger("Method:remove_csv_header") try: loger.print_info("Start csv header") remove_header_cmd = "sed -i '1d' %s" % (filename) os.system(remove_header_cmd) loger.print_info("Finish csv header") except Exception as error_message: loger.print_error("remove_csv_header error" + str(error_message)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception') raise error_message
def sqoop_data_hbase(host, db, username, password, jobid, hbase_database, dst_table, src_table, statement, statement_value): loger = logger.Logger("SQLServerCourier.SqoopDataHBase") try: sqoop_str = "sqoop import --connect 'jdbc:sqlserver://%s;username=%s;password=%s;database=%s' --table %s --hbase-table %s --column-family %s --hbase-create-table --incremental append --check-column %s --last-value %s --null-string ' ' --null-non-string ' ' --hive-drop-import-delims" % ( host, username, password, db, src_table, dst_table, dst_table, statement, statement_value) loger.print_info(sqoop_str) os.system(sqoop_str) except Exception as e: loger.print_error("sqlserver sqoop into hbase failed" + str(e)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception')
def upload_csv(filename, jobid): loger = logger.Logger("Method:upload_csv") try: loger.print_info("Start upload csv file") upload_cmd = 'hdfs dfs -put %s %s' % (filename, config.load_catalog) loger.print_info("upload command is :%s" % (upload_cmd)) os.system(upload_cmd) loger.print_info("Finish upload csv file") except Exception as error_message: loger.print_error("upload_csv error" + str(error_message)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception') raise error_message
def sqlserver_table_attributes(self, table): loger = logger.Logger("Method:sqlserver_table_attributes") try: primary_key_sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE TABLE_NAME='%s'" % ( table) primary_keys = self.query_one(primary_key_sql) columns = self.getTable_columns(table) return primary_keys, columns except Exception as error_message: loger.print_error("sqlserver_table_attributes error!" + str(error_message)) importjob = db_conns.importjob_conn.update_status( self.jobid, JobStatus.EXCEPTION) raise error_message
def execute_sql(self, sql): loger = logger.Logger("Method:execute_sql") try: loger.print_info(sql) cur = self.conn.cursor(cursor_factory=RealDictCursor) cur.execute(sql) self.conn.commit() return cur except Exception as error_message: loger.print_error("Execute SQL engine error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status( self.jobid, JobStatus.EXCEPTION) raise error_message
def restart_pxf_server(): loger = logger.Logger("PXFServerTracker") sys_command1 = '/etc/init.d/pxf-server restart' sys_command2 = 'ssh base2.zetyun.com' sys_command3 = 'ssh base3.zetyun.com' sys_command4 = 'exit' os.system(sys_command1) os.system(sys_command2) os.system(sys_command1) os.system(sys_command3) os.system(sys_command1) os.system(sys_command4) os.system(sys_command4) loger.print_info("Restart Successfully")
def sqoop_data(host, port, db, username, password, jobid, hive_database, dst_table, src_table, ml): loger = logger.Logger("Method:sqoop_data") try: primary_key, columns = ml.postgresql_table_attributes(src_table) if len(primary_key) != 0: primary_key = primary_key[0][0] sqoop_str = "sqoop import --hive-import --connect 'jdbc:postgresql://%s:%s/%s' --username '%s' --password '%s' --table '%s' --hive-table %s.%s --null-string ' ' --null-non-string ' ' --hive-drop-import-delims --split-by '%s' -m 3;"%(host, port, db, username, password, src_table, hive_database, dst_table, primary_key) else: sqoop_str = "sqoop import --hive-import --connect 'jdbc:postgresql://%s:%s/%s' --username '%s' --password '%s' --table '%s' --hive-table %s.%s --null-string ' ' --null-non-string ' ' --hive-drop-import-delims -m 1;"%(host, port, db, username, password, src_table, hive_database, dst_table) loger.print_info(sqoop_str) os.system(sqoop_str) except Exception as error_message: loger.print_error("postgres sqoop error" + str(error_message)) importjob = db_conns.importjob_conn.update_status(jobid, 'exception') raise error_message
def __init__(self, jobid): loger = logger.Logger("Method:init") try: self.jobid = jobid self.host = config.hive_host self.port = int(config.hive_port) self.user = config.hive_user self.password = config.hive_passwd self.database = config.hive_database self.authMechanism = config.hive_authMechanism self.conn = pyhs2.connect(host = self.host, port = self.port, authMechanism = self.authMechanism, user = self.user, password = self.password, database = self.database) except Exception as error_message: loger.print_error("Hive DB connection error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status(self.jobid, JobStatus.EXCEPTION) raise error_message
def postgresql_table_attributes(self, table): loger = logger.Logger("Method:postgresql_table_attributes") try: primary_key_sql = "select pg_attribute.attname as colname from pg_constraint inner join pg_class on pg_constraint.conrelid = pg_class.oid inner join pg_attribute on pg_attribute.attrelid = pg_class.oid and pg_attribute.attnum = pg_constraint.conkey[1]inner join pg_type on pg_type.oid = pg_attribute.atttypid where pg_class.relname = '%s' and pg_constraint.contype='p' and pg_table_is_visible(pg_class.oid)" % ( table) primary_keys = self.query_one(primary_key_sql) columns = self.getTable_columns(table) return primary_keys, columns except Exception as error_message: loger.print_error( "get postgresql_table_attributes error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status( self.jobid, JobStatus.EXCEPTION) raise error_message
def download_excel(filename, jobid): loger = logger.Logger("Method:download_excel") try: loger.print_info("Start download excel file") download_cmd = 'hdfs dfs -get %s %s' % (filename, config.local_cache) os.system(download_cmd) temp_name = config.local_cache + filename.split('/')[-1] loger.print_info("Finish download excel file") return temp_name except Exception as error_message: loger.print_error("download_excel error" + str(error_message)) importjob = db_conns.importjob_conn.update_status( jobid, 'exception') raise error_message
def hawq_pretreate(table, jobid): loger = logger.Logger("Method:hawq_pretreate") try: loger.print_info(table) hawq_pretreate = [] pretreate1 = 'drop external table if exists "%s"'%(table + '_ext') pretreate2 = 'drop table if EXISTS "%s"'%(table) hawq_pretreate.append(pretreate1) hawq_pretreate.append(pretreate2) loger.print_info(hawq_pretreate) return hawq_pretreate except Exception as error_message: loger.print_error("HDB pre treatment error, reason : " + str(error_message)) importjob = db_conns.importjob_conn.update_status(jobid, JobStatus.EXCEPTION) raise error_message