def describe_formatted( self, name: str, database: str | None = None, ) -> pd.DataFrame: """Retrieve the results of a `DESCRIBE FORMATTED` command. See Impala documentation for more. Parameters ---------- name Table name. Can be fully qualified (with database) database Database name """ from ibis.backends.impala.metadata import parse_metadata stmt = self._table_command( 'DESCRIBE FORMATTED', name, database=database ) result = self._exec_statement(stmt) # Leave formatting to pandas for c in result.columns: result[c] = result[c].str.strip() return parse_metadata(result)
def parsed_unpart(unpart_metadata): return parse_metadata(unpart_metadata)
def parsed_part(part_metadata): return parse_metadata(part_metadata)
def setUpClass(cls): cls.spacer = ('', nan, nan) cls.schema = [ ('# col_name', 'data_type', 'comment'), cls.spacer, ('foo', 'int', nan), ('bar', 'tinyint', nan), ('baz', 'bigint', nan), ] cls.partitions = [ ('# Partition Information', nan, nan), ('# col_name', 'data_type', 'comment'), cls.spacer, ('qux', 'bigint', nan), ] cls.info = [ ('# Detailed Table Information', nan, nan), ('Database:', 'tpcds', nan), ('Owner:', 'wesm', nan), ('CreateTime:', '2015-11-08 01:09:42-08:00', nan), ('LastAccessTime:', 'UNKNOWN', nan), ('Protect Mode:', 'None', nan), ('Retention:', '0', nan), ( 'Location:', ('hdfs://host-name:20500/my.db' '/dbname.table_name'), nan, ), ('Table Type:', 'EXTERNAL_TABLE', nan), ('Table Parameters:', nan, nan), ('', 'EXTERNAL', 'TRUE'), ('', 'STATS_GENERATED_VIA_STATS_TASK', 'true'), ('', 'numRows', '183592'), ('', 'transient_lastDdlTime', '1447340941'), ] cls.storage_info = [ ('# Storage Information', nan, nan), ( 'SerDe Library:', ('org.apache.hadoop' '.hive.serde2.lazy.LazySimpleSerDe'), nan, ), ('InputFormat:', 'org.apache.hadoop.mapred.TextInputFormat', nan), ( 'OutputFormat:', 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', nan, ), ('Compressed:', 'No', nan), ('Num Buckets:', '0', nan), ('Bucket Columns:', '[]', nan), ('Sort Columns:', '[]', nan), ('Storage Desc Params:', nan, nan), ('', 'field.delim', '|'), ('', 'serialization.format', '|'), ] cls.part_metadata = pd.DataFrame.from_records( _glue_lists_spacer( cls.spacer, [cls.schema, cls.partitions, cls.info, cls.storage_info], ), columns=['name', 'type', 'comment'], ) cls.unpart_metadata = pd.DataFrame.from_records( _glue_lists_spacer( cls.spacer, [cls.schema, cls.info, cls.storage_info] ), columns=['name', 'type', 'comment'], ) cls.parsed_part = parse_metadata(cls.part_metadata) cls.parsed_unpart = parse_metadata(cls.unpart_metadata)