def alter_table(self, database, table_name, new_table_name=None, comment=None, tblproperties=None): table_obj = self.get_table(database, table_name) if table_obj is None: raise PopupException(_("Failed to find the table: %s") % table_name) if table_obj.is_view: hql = 'ALTER VIEW `%s`.`%s`' % (database, table_name) else: hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name) if new_table_name: table_name = new_table_name hql += ' RENAME TO `%s`' % table_name elif comment is not None: hql += " SET TBLPROPERTIES ('comment' = '%s')" % comment elif tblproperties: hql += " SET TBLPROPERTIES (%s)" % ' ,'.join("'%s' = '%s'" % (k, v) for k, v in tblproperties.items()) timeout = SERVER_CONN_TIMEOUT.get() query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: self.close(handle) else: msg = _("Failed to execute alter table statement: %s") % hql raise QueryServerException(msg) return self.client.get_table(database, table_name)
def _get_tables_via_sparksql(self, database, table_names='*'): hql = "SHOW TABLES IN %s" % database if table_names != '*': identifier = self.to_matching_wildcard(table_names) hql += " LIKE '%s'" % (identifier) query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) # We get back: database | tableName | isTemporary return [{ 'name': row[1], 'type': 'VIEW' if row[2] else 'TABLE', 'comment': '' } for row in result.rows() ] else: return []
def alter_column(self, database, table_name, column_name, new_column_name, column_type, comment=None, partition_spec=None, cascade=False): hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name) if partition_spec: hql += ' PARTITION (%s)' % partition_spec hql += ' CHANGE COLUMN `%s` `%s` %s' % (column_name, new_column_name, column_type.upper()) if comment: hql += " COMMENT '%s'" % comment if cascade: hql += ' CASCADE' timeout = SERVER_CONN_TIMEOUT.get() query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: self.close(handle) else: msg = _("Failed to execute alter column statement: %s") % hql raise QueryServerException(msg) return self.get_column(database, table_name, new_column_name)
def get_tables(self, database='default', table_names='*'): hql = "SHOW TABLES IN `%s` '%s'" % (database, table_names) # self.client.get_tables(database, table_names) is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) return [name for table in result.rows() for name in table] else: return []
def get_databases(self, database_names='*'): identifier = self.to_matching_wildcard(database_names) hql = "SHOW DATABASES LIKE '%s'" % (identifier) # self.client.get_databases() is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) return [name for database in result.rows() for name in database] else: return []
def get_tables(self, database='default', table_names='*'): hql = "SHOW TABLES IN `%s` '%s'" % ( database, table_names ) # self.client.get_tables(database, table_names) is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) return [name for table in result.rows() for name in table] else: return []
def alter_database(self, database, properties): hql = 'ALTER database `%s` SET DBPROPERTIES (' % database hql += ', '.join(["'%s'='%s'" % (k, v) for k, v in properties.items()]) hql += ');' timeout = SERVER_CONN_TIMEOUT.get() query = hql_query(hql) handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: self.close(handle) else: msg = _("Failed to execute alter database statement: %s") % hql raise QueryServerException(msg) return self.client.get_database(database)
def get_databases(self, database_names='*'): identifier = self.to_matching_wildcard(database_names) hql = "SHOW DATABASES LIKE '%s'" % (identifier) # self.client.get_databases() is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) databases = [name for database in result.rows() for name in database] if len(databases) <= APPLY_NATURAL_SORT_MAX.get(): databases = apply_natural_sort(databases) return databases else: return []
def get_databases(self, database_names="*"): identifier = self.to_matching_wildcard(database_names) hql = "SHOW DATABASES LIKE '%s'" % (identifier) # self.client.get_databases() is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) databases = [name for database in result.rows() for name in database] if len(databases) <= APPLY_NATURAL_SORT_MAX.get(): databases = apply_natural_sort(databases) return databases else: return []
def get_tables(self, database='default', table_names='*'): identifier = self.to_matching_wildcard(table_names) hql = "SHOW TABLES IN `%s` '%s'" % (database, identifier) # self.client.get_tables(database, table_names) is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) tables = [name for table in result.rows() for name in table] if len(tables) <= APPLY_NATURAL_SORT_MAX.get(): tables = apply_natural_sort(tables) return tables else: return []
def get_tables(self, database='default', table_names='*'): identifier = self.to_matching_wildcard(table_names) identifier = "'%s'" % identifier if identifier != '*' else '' # Filter not supported in SparkSql hql = "SHOW TABLES IN `%s` %s" % (database, identifier) # self.client.get_tables(database, table_names) is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) tables = [table[0] for table in result.rows()] # We only keep the first column as the name, SparkSql returns multiple columns if len(tables) <= APPLY_NATURAL_SORT_MAX.get(): tables = apply_natural_sort(tables) return tables else: return []
def get_tables(self, database='default', table_names='*'): identifier = self.to_matching_wildcard(table_names) identifier = "'%s'" % identifier if identifier != '*' else '' # Filter not supported in SparkSql hql = "SHOW TABLES IN `%s` %s" % ( database, identifier ) # self.client.get_tables(database, table_names) is too slow query = hql_query(hql) timeout = SERVER_CONN_TIMEOUT.get() handle = self.execute_and_wait(query, timeout_sec=timeout) if handle: result = self.fetch(handle, rows=5000) self.close(handle) tables = [ table[0] for table in result.rows() ] # We only keep the first column as the name, SparkSql returns multiple columns if len(tables) <= APPLY_NATURAL_SORT_MAX.get(): tables = apply_natural_sort(tables) return tables else: return []
def meta_client(self): """Get the Thrift client to talk to the metastore""" class UnicodeMetastoreClient(object): """Wrap the thrift client to take and return Unicode.""" def __init__(self, client): self._client = client def __getattr__(self, attr): if attr in self.__dict__: return self.__dict__[attr] return getattr(self._client, attr) def _encode_storage_descriptor(self, sd): _encode_struct_attr(sd, 'location') for col in sd.cols: _encode_struct_attr(col, 'comment') self._encode_map(sd.parameters) def _decode_storage_descriptor(self, sd): _decode_struct_attr(sd, 'location') for col in sd.cols: _decode_struct_attr(col, 'comment') self._decode_map(sd.parameters) def _encode_map(self, mapp): for key, value in mapp.iteritems(): mapp[key] = smart_str(value, strings_only=True) def _decode_map(self, mapp): for key, value in mapp.iteritems(): mapp[key] = force_unicode(value, strings_only=True, errors='replace') def create_database(self, name, description): description = smart_str(description) return self._client.create_database(name, description) def get_database(self, *args, **kwargs): db = self._client.get_database(*args, **kwargs) return _decode_struct_attr(db, 'description') def get_fields(self, *args, **kwargs): res = self._client.get_fields(*args, **kwargs) for fschema in res: _decode_struct_attr(fschema, 'comment') return res def get_table(self, *args, **kwargs): res = self._client.get_table(*args, **kwargs) self._decode_storage_descriptor(res.sd) self._decode_map(res.parameters) return res def alter_table(self, dbname, tbl_name, new_tbl): self._encode_storage_descriptor(new_tbl.sd) self._encode_map(new_tbl.parameters) return self._client.alter_table(dbname, tbl_name, new_tbl) def _encode_partition(self, part): self._encode_storage_descriptor(part.sd) self._encode_map(part.parameters) return part def _decode_partition(self, part): self._decode_storage_descriptor(part.sd) self._decode_map(part.parameters) return part def add_partition(self, new_part): self._encode_partition(new_part) part = self._client.add_partition(new_part) return self._decode_partition(part) def get_partition(self, *args, **kwargs): part = self._client.get_partition(*args, **kwargs) return self._decode_partition(part) def get_partitions(self, *args, **kwargs): part_list = self._client.get_partitions(*args, **kwargs) for part in part_list: self._decode_partition(part) return part_list def alter_partition(self, db_name, tbl_name, new_part): self._encode_partition(new_part) return self._client.alter_partition(db_name, tbl_name, new_part) use_sasl, kerberos_principal_short_name = HiveMetastoreClient.get_security() # TODO Reuse from HiveServer2 lib client = thrift_util.get_client( ThriftHiveMetastore.Client, host=self.query_server['server_host'], port=self.query_server['server_port'], service_name="Hive Metastore Server", kerberos_principal=kerberos_principal_short_name, use_sasl=use_sasl, timeout_seconds=SERVER_CONN_TIMEOUT.get() ) return UnicodeMetastoreClient(client)
def meta_client(self): """Get the Thrift client to talk to the metastore""" class UnicodeMetastoreClient(object): """Wrap the thrift client to take and return Unicode.""" def __init__(self, client): self._client = client def __getattr__(self, attr): if attr in self.__dict__: return self.__dict__[attr] return getattr(self._client, attr) def _encode_storage_descriptor(self, sd): _encode_struct_attr(sd, 'location') for col in sd.cols: _encode_struct_attr(col, 'comment') self._encode_map(sd.parameters) def _decode_storage_descriptor(self, sd): _decode_struct_attr(sd, 'location') for col in sd.cols: _decode_struct_attr(col, 'comment') self._decode_map(sd.parameters) def _encode_map(self, mapp): for key, value in mapp.items(): mapp[key] = smart_str(value, strings_only=True) def _decode_map(self, mapp): for key, value in mapp.items(): mapp[key] = force_unicode(value, strings_only=True, errors='replace') def create_database(self, name, description): description = smart_str(description) return self._client.create_database(name, description) def get_database(self, *args, **kwargs): db = self._client.get_database(*args, **kwargs) return _decode_struct_attr(db, 'description') def get_fields(self, *args, **kwargs): res = self._client.get_fields(*args, **kwargs) for fschema in res: _decode_struct_attr(fschema, 'comment') return res def get_table(self, *args, **kwargs): res = self._client.get_table(*args, **kwargs) self._decode_storage_descriptor(res.sd) self._decode_map(res.parameters) return res def alter_table(self, dbname, tbl_name, new_tbl): self._encode_storage_descriptor(new_tbl.sd) self._encode_map(new_tbl.parameters) return self._client.alter_table(dbname, tbl_name, new_tbl) def _encode_partition(self, part): self._encode_storage_descriptor(part.sd) self._encode_map(part.parameters) return part def _decode_partition(self, part): self._decode_storage_descriptor(part.sd) self._decode_map(part.parameters) return part def add_partition(self, new_part): self._encode_partition(new_part) part = self._client.add_partition(new_part) return self._decode_partition(part) def get_partition(self, *args, **kwargs): part = self._client.get_partition(*args, **kwargs) return self._decode_partition(part) def get_partitions(self, *args, **kwargs): part_list = self._client.get_partitions(*args, **kwargs) for part in part_list: self._decode_partition(part) return part_list def alter_partition(self, db_name, tbl_name, new_part): self._encode_partition(new_part) return self._client.alter_partition(db_name, tbl_name, new_part) use_sasl, kerberos_principal_short_name = HiveMetastoreClient.get_security( ) # TODO Reuse from HiveServer2 lib client = thrift_util.get_client( ThriftHiveMetastore.Client, host=self.query_server['server_host'], port=self.query_server['server_port'], service_name="Hive Metastore Server", kerberos_principal=kerberos_principal_short_name, use_sasl=use_sasl, timeout_seconds=SERVER_CONN_TIMEOUT.get()) return UnicodeMetastoreClient(client)