def create_body(table_header, table_body): body_script = "" partition_key_list = [] _table = Table(table_header) _table_layer = _table.table_layer() for index, row in table_body.iterrows(): _column_partition_key = str( row[mtm.TABLE_BODY_STRUCTURE[5]]).strip() if _column_partition_key == mtm.TABLE_BODY_PARTITION_KEY_LIST[0]: partition_key_list.append( str(row[mtm.TABLE_BODY_STRUCTURE[1]]).strip()) else: _column_physical_name = f"`{str(row[mtm.TABLE_BODY_STRUCTURE[1]]).strip()}`" if _table_layer == cdhh.TABLE_LAYER_LIST[0]: _column_type = "String" else: _column_type = str( row[mtm.TABLE_BODY_STRUCTURE[2]]).strip() _column_comment = str(row[mtm.TABLE_BODY_STRUCTURE[8]]).strip() # Mandatory check _check_key_list = [ mtm.TABLE_BODY_STRUCTURE[1], mtm.TABLE_BODY_STRUCTURE[2], mtm.TABLE_BODY_STRUCTURE[8] ] _check_value_list = [ _column_physical_name, _column_type, _column_comment ] check_zip = zip(_check_key_list, _check_value_list) fe.table_info_miss(check_zip) if body_script: body_script += f",\n{_column_physical_name} {_column_type} COMMENT '{_column_comment}'" else: body_script += f"{_column_physical_name} {_column_type} COMMENT '{_column_comment}'" print(f"{util.timestamp()}: DDL body built successful.") return body_script, partition_key_list
def serialization_encoding(self): """ :param self: """ serialization_encoding = str(self.info[mtm.TABLE_HEADER_STRUCTURE[9][1][0]]).strip() check_zip = zip(mtm.TABLE_HEADER_STRUCTURE[9][0], serialization_encoding) fe.table_info_miss(check_zip) return serialization_encoding
def separator_char(self): """ :param self: """ separator_char = str(self.info[mtm.TABLE_HEADER_STRUCTURE[8][1][0]]).strip() check_zip = zip(mtm.TABLE_HEADER_STRUCTURE[8][0], separator_char) fe.table_info_miss(check_zip) return separator_char
def quote_char(self): """ :param self: """ quote_char = str(self.info[mtm.TABLE_HEADER_STRUCTURE[7][1][0]]).strip() check_zip = zip(mtm.TABLE_HEADER_STRUCTURE[7][0], quote_char) fe.table_info_miss(check_zip) return quote_char
def field_delim(self): """ :param self: """ field_delim = str(self.info[mtm.TABLE_HEADER_STRUCTURE[6][1][0]]).strip() check_zip = zip(mtm.TABLE_HEADER_STRUCTURE[6][0], field_delim) fe.table_info_miss(check_zip) return field_delim
def table_type(self): """ :param self: """ table_type = str(self.info[mtm.TABLE_HEADER_STRUCTURE[4][1][0]]).strip() check_zip = zip(mtm.TABLE_HEADER_STRUCTURE[4][0], table_type) fe.table_info_miss(check_zip) return table_type
def database_name(self): """ :param self: """ database_name = str(self.info[mtm.TABLE_HEADER_STRUCTURE[0][1][0]]).strip() check_zip = zip(mtm.TABLE_HEADER_STRUCTURE[0][0], database_name) fe.table_info_miss(check_zip) return database_name
def table_layer(self): """ :param self: """ table_layer = self.info[cf.TABLE_HEADER_STRUCTURE[3][1][0]].strip() check_zip = zip(cf.TABLE_HEADER_STRUCTURE[3][0], table_layer) fe.table_info_miss(check_zip) return table_layer
def table_comment(self): """ :param self: """ table_comment = self.info[cf.TABLE_HEADER_STRUCTURE[2][1][0]].strip() check_zip = zip(cf.TABLE_HEADER_STRUCTURE[2][0], table_comment) fe.table_info_miss(check_zip) return table_comment
def tail_landing_external(table, partition_key_list): tail_script = "" _table = Table(table) _database_name = _table.database_name() _table_name = _table.table_name() _table_comment = _table.table_comment() _row_format_serde = _table.row_format_serde() _data_file_location = _table.data_file_location() _partitioned_by = _table.partitioned_by(partition_key_list) _store_as = _table.store_as() _table_properties = _table.table_properties() _with_serdeproperties = _table.with_serdeproperties() # Check table required information _check_key_list = [mtm.TABLE_HEADER_STRUCTURE[10][0]] _check_value_list = [_data_file_location] check_zip = zip(_check_key_list, _check_value_list) fe.table_info_miss(check_zip) # Header script header_script = f"CREATE EXTERNAL TABLE `{_database_name}.{_table_name}`(" # Tail script tail_script += ")" # Table comment tail_script += f"\nCOMMENT '{_table_comment}'" # Partition if _partitioned_by: tail_script += f"\nPARTITIONED BY (\n{_partitioned_by}\n)" else: pass # Row format serde tail_script += f"\nROW FORMAT SERDE '{_row_format_serde}'" # CSV or Excel if _row_format_serde == cdhh.ROW_FORMAT_SERDE_LIST[0][1]: # With Serde Properties tail_script += f"\n{_with_serdeproperties}" else: pass # STORED AS tail_script += f"\n{_store_as}" # External table location tail_script += f"\nLOCATION '{_data_file_location}'" # CSV or Excel if _row_format_serde == cdhh.ROW_FORMAT_SERDE_LIST[1][1]: # Table properties tail_script += f"\n{_table_properties}" else: pass # Table Properties tail_script += "\nTBLPROPERTIES ( 'skip.header.line.count'='1')" \ "\n;" return header_script, tail_script
def row_format_serde(self): """ :param self: """ row_format_delimiter = str(self.info[mtm.TABLE_HEADER_STRUCTURE[5][1][0]]).strip() check_zip = zip(mtm.TABLE_HEADER_STRUCTURE[5][0], row_format_delimiter) fe.table_info_miss(check_zip) for i in range(len(cdhh.ROW_FORMAT_SERDE_LIST)): if row_format_delimiter == cdhh.ROW_FORMAT_SERDE_LIST[i][0]: row_format_delimiter = cdhh.ROW_FORMAT_SERDE_LIST[i][1] break else: pass return row_format_delimiter
def create_body(table_header, table_body): body_script = "" partition_key_list = [] _table = Table(table_header) _table_layer = _table.table_layer() for index, row in table_body.iterrows(): _column_partition_key = row[cf.TABLE_BODY_STRUCTURE[4]].strip() if _column_partition_key == cf.TABLE_BODY_PARTITION_KEY_LIST[0]: partition_key_list.append( row[cf.TABLE_BODY_STRUCTURE[1]].strip()) else: _script_format = "`{}`" _column_physical_name = _script_format.format( row[cf.TABLE_BODY_STRUCTURE[1]].strip()) if _table_layer == cf.TABLE_LAYER_LIST[0]: _column_type = "String" else: _column_type = row[cf.TABLE_BODY_STRUCTURE[2]].strip() _column_comment = row[cf.TABLE_BODY_STRUCTURE[7]].strip() # Mandatory check _check_key_list = [ cf.TABLE_BODY_STRUCTURE[1], cf.TABLE_BODY_STRUCTURE[2], cf.TABLE_BODY_STRUCTURE[7] ] _check_value_list = [ _column_physical_name, _column_type, _column_comment ] check_zip = zip(_check_key_list, _check_value_list) fe.table_info_miss(check_zip) if body_script: _script_format = ",\n{} {} COMMENT '{}'" _body_script = _script_format.format( _column_physical_name, _column_type, _column_comment) body_script = body_script + _body_script else: _script_format = "{} {} COMMENT '{}'" _body_script = _script_format.format( _column_physical_name, _column_type, _column_comment) body_script = body_script + _body_script _script_format = "{}: DDL body built successful." _date_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print(_script_format.format(_date_time)) return body_script, partition_key_list
def tail_landing_external(table, partition_key_list): tail_script = "" _table = Table(table) _database_name = _table.database_name() _table_name = _table.table_name() _table_comment = _table.table_comment() _row_format_serde = _table.row_format_serde() _data_file_location = _table.data_file_location() _partitioned_by = _table.partitioned_by(partition_key_list) _store_as = _table.store_as() _table_properties = _table.table_properties() _with_serdeproperties = _table.with_serdeproperties() # Check External table required information _check_key_list = [cf.TABLE_HEADER_STRUCTURE[10][0]] _check_value_list = [_data_file_location] check_zip = zip(_check_key_list, _check_value_list) fe.table_info_miss(check_zip) # Header script _script_format = "CREATE EXTERNAL TABLE `{}.{}`(" header_script = _script_format.format(_database_name, _table_name) # Tail script tail_script = tail_script + ")" # Table comment _script_format = "\nCOMMENT '{}'" _tail_script = _script_format.format(_table_comment) tail_script = tail_script + _tail_script # Partition if _partitioned_by: _script_format = "\nPARTITIONED BY (\n{}\n)" _tail_script = _script_format.format(_partitioned_by) tail_script = tail_script + _tail_script else: pass # Row format serde _script_format = "\nROW FORMAT SERDE '{}'" _tail_script = _script_format.format(_row_format_serde) tail_script = tail_script + _tail_script # CSV or Excel if _row_format_serde == cf.ROW_FORMAT_SERDE_LIST[0][1]: # With Serde Properties _script_format = "\n{}" _tail_script = _script_format.format(_with_serdeproperties) tail_script = tail_script + _tail_script else: pass # STORED AS _script_format = "\n{}" _tail_script = _script_format.format(_store_as) tail_script = tail_script + _tail_script # External table location _script_format = "\nLOCATION '{}'" _tail_script = _script_format.format(_data_file_location) tail_script = tail_script + _tail_script # CSV or Excel if _row_format_serde == cf.ROW_FORMAT_SERDE_LIST[1][1]: # Table properties _script_format = "\n{}" _tail_script = _script_format.format(_table_properties) tail_script = tail_script + _tail_script else: pass tail_script = tail_script + "\n;" return header_script, tail_script