예제 #1
0
    def describe_formatted(
        self,
        name: str,
        database: str | None = None,
    ) -> pd.DataFrame:
        """Retrieve the results of a `DESCRIBE FORMATTED` command.

        See Impala documentation for more.

        Parameters
        ----------
        name
            Table name. Can be fully qualified (with database)
        database
            Database name
        """
        from ibis.backends.impala.metadata import parse_metadata

        stmt = self._table_command(
            'DESCRIBE FORMATTED', name, database=database
        )
        result = self._exec_statement(stmt)

        # Leave formatting to pandas
        for c in result.columns:
            result[c] = result[c].str.strip()

        return parse_metadata(result)
예제 #2
0
def parsed_unpart(unpart_metadata):
    return parse_metadata(unpart_metadata)
예제 #3
0
def parsed_part(part_metadata):
    return parse_metadata(part_metadata)
예제 #4
0
    def setUpClass(cls):
        cls.spacer = ('', nan, nan)

        cls.schema = [
            ('# col_name', 'data_type', 'comment'),
            cls.spacer,
            ('foo', 'int', nan),
            ('bar', 'tinyint', nan),
            ('baz', 'bigint', nan),
        ]

        cls.partitions = [
            ('# Partition Information', nan, nan),
            ('# col_name', 'data_type', 'comment'),
            cls.spacer,
            ('qux', 'bigint', nan),
        ]

        cls.info = [
            ('# Detailed Table Information', nan, nan),
            ('Database:', 'tpcds', nan),
            ('Owner:', 'wesm', nan),
            ('CreateTime:', '2015-11-08 01:09:42-08:00', nan),
            ('LastAccessTime:', 'UNKNOWN', nan),
            ('Protect Mode:', 'None', nan),
            ('Retention:', '0', nan),
            (
                'Location:',
                ('hdfs://host-name:20500/my.db' '/dbname.table_name'),
                nan,
            ),
            ('Table Type:', 'EXTERNAL_TABLE', nan),
            ('Table Parameters:', nan, nan),
            ('', 'EXTERNAL', 'TRUE'),
            ('', 'STATS_GENERATED_VIA_STATS_TASK', 'true'),
            ('', 'numRows', '183592'),
            ('', 'transient_lastDdlTime', '1447340941'),
        ]

        cls.storage_info = [
            ('# Storage Information', nan, nan),
            (
                'SerDe Library:',
                ('org.apache.hadoop' '.hive.serde2.lazy.LazySimpleSerDe'),
                nan,
            ),
            ('InputFormat:', 'org.apache.hadoop.mapred.TextInputFormat', nan),
            (
                'OutputFormat:',
                'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
                nan,
            ),
            ('Compressed:', 'No', nan),
            ('Num Buckets:', '0', nan),
            ('Bucket Columns:', '[]', nan),
            ('Sort Columns:', '[]', nan),
            ('Storage Desc Params:', nan, nan),
            ('', 'field.delim', '|'),
            ('', 'serialization.format', '|'),
        ]

        cls.part_metadata = pd.DataFrame.from_records(
            _glue_lists_spacer(
                cls.spacer,
                [cls.schema, cls.partitions, cls.info, cls.storage_info],
            ),
            columns=['name', 'type', 'comment'],
        )

        cls.unpart_metadata = pd.DataFrame.from_records(
            _glue_lists_spacer(
                cls.spacer, [cls.schema, cls.info, cls.storage_info]
            ),
            columns=['name', 'type', 'comment'],
        )

        cls.parsed_part = parse_metadata(cls.part_metadata)
        cls.parsed_unpart = parse_metadata(cls.unpart_metadata)