def get_tables(self, database, table_names): req = TGetTablesReq(schemaName=database, tableName=table_names) res = self.call(self._client.GetTables, req) results, schema = self.fetch_result(res.operationHandle) return HiveServerTRowSet(results.results, schema.schema).cols(('TABLE_NAME',))
def get_table(self, database, table_name, partition_spec=None): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result(res.operationHandle, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(res.operationHandle) if partition_spec: query = 'DESCRIBE FORMATTED `%s`.`%s` PARTITION(%s)' % (database, table_name, partition_spec) else: query = 'DESCRIBE FORMATTED `%s`.`%s`' % (database, table_name) try: (desc_results, desc_schema), operation_handle = self.execute_statement(query, max_rows=10000, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(operation_handle) except Exception, e: if 'cannot find field' in str(e): # Workaround until Hive 2.0 and HUE-3751 (desc_results, desc_schema), operation_handle = self.execute_statement('USE `%s`' % database) self.close_operation(operation_handle) if partition_spec: query = 'DESCRIBE FORMATTED `%s` PARTITION(%s)' % (table_name, partition_spec) else: query = 'DESCRIBE FORMATTED `%s`' % table_name (desc_results, desc_schema), operation_handle = self.execute_statement(query, max_rows=10000, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(operation_handle) else: raise e
def get_table(self, database, table_name, column_name=None, nested_tokens=None, partition_spec=None): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result( res.operationHandle, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(res.operationHandle) if column_name and nested_tokens: # DESCRIBE on nested types cannot accept database name nested_spec = '.'.join('`%s`' % token for token in nested_tokens) query = 'DESCRIBE FORMATTED `%s`.`%s`.%s' % ( table_name, column_name, nested_spec) else: query = 'DESCRIBE FORMATTED `%s`.`%s`' % (database, table_name) if column_name: query += ' `%s`' % column_name elif partition_spec: query += ' PARTITION(%s)' % partition_spec (desc_results, desc_schema), operation_handle = self.execute_statement( query, max_rows=5000, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(operation_handle) return HiveServerTable(table_results.results, table_schema.schema, desc_results.results, desc_schema.schema)
def get_tables(self, database, table_names): req = TGetTablesReq(schemaName=database, tableName=table_names) res = self.call(self._client.GetTables, req) results, schema = self.fetch_result(res.operationHandle, orientation=TFetchOrientation.FETCH_NEXT, max_rows=5000) self.close_operation(res.operationHandle) return HiveServerTRowSet(results.results, schema.schema).cols(('TABLE_NAME',))
def get_table(self, database, table_name): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result(res.operationHandle) desc_results, desc_schema = self.execute_statement('DESCRIBE EXTENDED %s' % table_name) return HiveServerTable(table_results.results, table_schema.schema, desc_results.results, desc_schema.schema)
def get_tables_meta(self, database, table_names, table_types=None): if not table_types: table_types = self.DEFAULT_TABLE_TYPES req = TGetTablesReq(schemaName=database, tableName=table_names, tableTypes=table_types) res = self.call(self._client.GetTables, req) results, schema = self.fetch_result(res.operationHandle, orientation=TFetchOrientation.FETCH_NEXT, max_rows=5000) self.close_operation(res.operationHandle) cols = ('TABLE_NAME', 'TABLE_TYPE', 'REMARKS') return HiveServerTRowSet(results.results, schema.schema).cols(cols)
def get_table(self, database, table_name): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result(res.operationHandle) # Using 'SELECT * from table' does not show column comments in the metadata desc_results, desc_schema = self.execute_statement( 'DESCRIBE EXTENDED %s' % table_name) return HiveServerTable(table_results.results, table_schema.schema, desc_results.results, desc_schema.schema)
def get_table(self, database, table_name): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result(res.operationHandle, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(res.operationHandle) query = 'DESCRIBE FORMATTED `%s`.`%s`' % (database, table_name) (desc_results, desc_schema), operation_handle = self.execute_statement(query, max_rows=5000, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(operation_handle) return HiveServerTable(table_results.results, table_schema.schema, desc_results.results, desc_schema.schema)
def get_table(self, database, table_name, partition_spec=None): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result(res.operationHandle, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(res.operationHandle) if partition_spec: query = 'DESCRIBE FORMATTED `%s`.`%s` PARTITION(%s)' % (database, table_name, partition_spec) else: query = 'DESCRIBE FORMATTED `%s`.`%s`' % (database, table_name) try: (desc_results, desc_schema), operation_handle = self.execute_statement(query, max_rows=10000, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(operation_handle) except Exception, e: ex_string = str(e) if 'cannot find field' in ex_string: # Workaround until Hive 2.0 and HUE-3751 (desc_results, desc_schema), operation_handle = self.execute_statement('USE `%s`' % database) self.close_operation(operation_handle) if partition_spec: query = 'DESCRIBE FORMATTED `%s` PARTITION(%s)' % (table_name, partition_spec) else: query = 'DESCRIBE FORMATTED `%s`' % table_name (desc_results, desc_schema), operation_handle = self.execute_statement(query, max_rows=10000, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(operation_handle) elif 'not have privileges for DESCTABLE' in ex_string or 'AuthorizationException' in ex_string: # HUE-5608: No table permission but some column permissions query = 'DESCRIBE `%s`.`%s`' % (database, table_name) (desc_results, desc_schema), operation_handle = self.execute_statement(query, max_rows=10000, orientation=TFetchOrientation.FETCH_NEXT) self.close_operation(operation_handle) desc_results.results.columns[0].stringVal.values.insert(0, '# col_name') desc_results.results.columns[0].stringVal.values.insert(1, '') desc_results.results.columns[1].stringVal.values.insert(0, 'data_type') desc_results.results.columns[1].stringVal.values.insert(1, None) desc_results.results.columns[2].stringVal.values.insert(0, 'comment') desc_results.results.columns[2].stringVal.values.insert(1, None) try: part_index = desc_results.results.columns[0].stringVal.values.index('# Partition Information') desc_results.results.columns[0].stringVal.values = desc_results.results.columns[0].stringVal.values[:part_index] # Strip duplicate columns of partitioned tables desc_results.results.columns[1].stringVal.values = desc_results.results.columns[1].stringVal.values[:part_index] desc_results.results.columns[2].stringVal.values = desc_results.results.columns[2].stringVal.values[:part_index] desc_results.results.columns[1].stringVal.nulls = '' # Important to not clear the last two types desc_results.results.columns[1].stringVal.values[-1] = None desc_results.results.columns[2].stringVal.values[-1] = None except ValueError: desc_results.results.columns[0].stringVal.values.append('') desc_results.results.columns[1].stringVal.values.append(None) desc_results.results.columns[2].stringVal.values.append(None) else: raise e
def get_table(self, database, table_name): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result(res.operationHandle) # Using 'SELECT * from table' does not show column comments in the metadata if self.query_server['server_name'] == 'beeswax': self.execute_statement(statement='SET hive.server2.blocking.query=true') desc_results, desc_schema = self.execute_statement('DESCRIBE EXTENDED %s' % table_name) return HiveServerTable(table_results.results, table_schema.schema, desc_results.results, desc_schema.schema)
def get_table(self, database, table_name): req = TGetTablesReq(schemaName=database, tableName=table_name) res = self.call(self._client.GetTables, req) table_results, table_schema = self.fetch_result(res.operationHandle) if self.query_server['server_name'] == 'impala': # Impala does not supported extended query = 'DESCRIBE %s' % table_name else: query = 'DESCRIBE EXTENDED %s' % table_name desc_results, desc_schema = self.execute_statement(query) return HiveServerTable(table_results.results, table_schema.schema, desc_results.results, desc_schema.schema)