def _get_create_lakeout_statement(self): projection_columns = self.get_projection_columns( self.get_lake_column_names(), self.columns_lakeout) select_statement = HQLGenerator.SelectStatementBuilder( self.db_table_lake, projection_columns).build() return HQLGenerator.generate_create_view_as_select( self.db_view_lake_out, select_statement)
def create_tables(self, lake_table_location_prefix): hql = "\n".join([ self._get_create_database_if_not_exists(self.db_landing), self._get_create_database_if_not_exists(self.db_lake), self._get_create_landing_statement( self.dir_landing_final).with_semicolon(), HQLGenerator.generate_repair_table( self.db_table_landing).with_semicolon(), self._get_create_lake_statement( self.dir_lake_final if lake_table_location_prefix is None else os.path.join(self.dir_lake_table, lake_table_location_prefix)). with_semicolon(), HQLGenerator.generate_repair_table( self.db_table_lake).with_semicolon() ]) try: self.emr_system.execute_hive(hql) except Exception: msg = "Failed to create {} and {} tables.".format( self.db_table_landing, self.db_table_lake) logging.error(msg) raise logging.info("Successfully created {} and {} tables.".format( self.db_table_landing, self.db_table_lake))
def drop_table(db_table): drop_table_hql = HQLGenerator.generate_drop_table( db_table).with_semicolon() try: logging.info("Dropping table {} from Hive".format(db_table)) self.emr_system.execute_hive(drop_table_hql) return 1 except Exception: logging.warning("Unable to drop {} table".format(db_table)) return 0
def _get_create_landing_statement(self, table_location): table_properties = { "serialization.encoding": "UTF-8", } if int(self.header_lines) > 0: table_properties["skip.header.line.count"] = str(self.header_lines) return HQLGenerator.CreateDSVTableStatementBuilder( self.db_table_landing, table_location, self.columns_lake, self.delimiter).with_properties(table_properties).build( is_external=True)
def reset_table(db_table, create_table_hql, table_location, table_partitioned_flag): if table_partitioned_flag: drop_table_hql = HQLGenerator.generate_drop_table( db_table).with_semicolon() repair_table_hql = HQLGenerator.generate_repair_table( db_table).with_semicolon() hql = "\n".join( [drop_table_hql, create_table_hql, repair_table_hql]) else: hql = HQLGenerator.generate_alter_table_location( db_table, table_location).with_semicolon() try: logging.info("Resetting '{}' table.".format(db_table)) self.emr_system.execute_hive(hql) except M3DEMRStepException as e: if "Table not found" in str(e): pass # the table might already not be present, so we will ignore error arising from that case else: logging.info("Failed to reset '{}' table: {}".format( db_table, e)) raise
def _get_drop_lakeout_statement(self): return HQLGenerator.generate_drop_view_if_exists(self.db_view_lake_out)
def create_statement(_columns, _target_partitions=None): return HQLGenerator.CreateParquetTableStatementBuilder(self.db_table_lake, table_location, _columns) \ .partitioned_by(_target_partitions) \ .with_properties({"serialization.encoding": "UTF-8"}) \ .build(is_external=True)
def _get_create_database_if_not_exists(database_name): return HQLGenerator.generate_create_database_if_not_exits( database_name).with_semicolon()