def setUpClass(cls): cls.spacer = ('', nan, nan) cls.schema = [('# col_name', 'data_type', 'comment'), cls.spacer, ('foo', 'int', nan), ('bar', 'tinyint', nan), ('baz', 'bigint', nan)] cls.partitions = [('# Partition Information', nan, nan), ('# col_name', 'data_type', 'comment'), cls.spacer, ('qux', 'bigint', nan)] cls.info = [ ('# Detailed Table Information', nan, nan), ('Database:', 'tpcds', nan), ('Owner:', 'wesm', nan), ('CreateTime:', 'Sun Nov 08 01:09:42 PST 2015', nan), ('LastAccessTime:', 'UNKNOWN', nan), ('Protect Mode:', 'None', nan), ('Retention:', '0', nan), ('Location:', ('hdfs://host-name:20500/my.db' '/dbname.table_name'), nan), ('Table Type:', 'EXTERNAL_TABLE', nan), ('Table Parameters:', nan, nan), ('', 'EXTERNAL', 'TRUE'), ('', 'STATS_GENERATED_VIA_STATS_TASK', 'true'), ('', 'numRows', '183592'), ('', 'transient_lastDdlTime', '1447340941'), ] cls.storage_info = [ ('# Storage Information', nan, nan), ('SerDe Library:', ('org.apache.hadoop' '.hive.serde2.lazy.LazySimpleSerDe'), nan), ('InputFormat:', 'org.apache.hadoop.mapred.TextInputFormat', nan), ('OutputFormat:', 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', nan), ('Compressed:', 'No', nan), ('Num Buckets:', '0', nan), ('Bucket Columns:', '[]', nan), ('Sort Columns:', '[]', nan), ('Storage Desc Params:', nan, nan), ('', 'field.delim', '|'), ('', 'serialization.format', '|') ] cls.part_metadata = pd.DataFrame.from_records( _glue_lists_spacer( cls.spacer, [cls.schema, cls.partitions, cls.info, cls.storage_info]), columns=['name', 'type', 'comment']) cls.unpart_metadata = pd.DataFrame.from_records( _glue_lists_spacer(cls.spacer, [cls.schema, cls.info, cls.storage_info]), columns=['name', 'type', 'comment']) cls.parsed_part = parse_metadata(cls.part_metadata) cls.parsed_unpart = parse_metadata(cls.unpart_metadata)
def setUpClass(cls): cls.spacer = ('', nan, nan) cls.schema = [ ('# col_name', 'data_type', 'comment'), cls.spacer, ('foo', 'int', nan), ('bar', 'tinyint', nan), ('baz', 'bigint', nan) ] cls.partitions = [ ('# Partition Information', nan, nan), ('# col_name', 'data_type', 'comment'), cls.spacer, ('qux', 'bigint', nan) ] cls.info = [ ('# Detailed Table Information', nan, nan), ('Database:', 'tpcds', nan), ('Owner:', 'wesm', nan), ('CreateTime:', 'Sun Nov 08 01:09:42 PST 2015', nan), ('LastAccessTime:', 'UNKNOWN', nan), ('Protect Mode:', 'None', nan), ('Retention:', '0', nan), ('Location:', ('hdfs://host-name:20500/my.db' '/dbname.table_name'), nan), ('Table Type:', 'EXTERNAL_TABLE', nan), ('Table Parameters:', nan, nan), ('', 'EXTERNAL', 'TRUE'), ('', 'STATS_GENERATED_VIA_STATS_TASK', 'true'), ('', 'numRows', '183592'), ('', 'transient_lastDdlTime', '1447369741'), ] cls.storage_info = [ ('# Storage Information', nan, nan), ('SerDe Library:', ('org.apache.hadoop' '.hive.serde2.lazy.LazySimpleSerDe'), nan), ('InputFormat:', 'org.apache.hadoop.mapred.TextInputFormat', nan), ('OutputFormat:', 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', nan), ('Compressed:', 'No', nan), ('Num Buckets:', '0', nan), ('Bucket Columns:', '[]', nan), ('Sort Columns:', '[]', nan), ('Storage Desc Params:', nan, nan), ('', 'field.delim', '|'), ('', 'serialization.format', '|') ] cls.part_metadata = pd.DataFrame.from_records( _glue_lists_spacer(cls.spacer, [cls.schema, cls.partitions, cls.info, cls.storage_info]), columns=['name', 'type', 'comment']) cls.unpart_metadata = pd.DataFrame.from_records( _glue_lists_spacer(cls.spacer, [cls.schema, cls.info, cls.storage_info]), columns=['name', 'type', 'comment']) cls.parsed_part = parse_metadata(cls.part_metadata) cls.parsed_unpart = parse_metadata(cls.unpart_metadata)