Esempio n. 1
0
    def test_set_column_orders(self, vector, unique_database, tmpdir):
        """Tests that the Parquet writers set FileMetaData::column_orders."""
        source_table = "functional_parquet.alltypessmall"
        target_table = "test_set_column_orders"
        qualified_target_table = "{0}.{1}".format(unique_database,
                                                  target_table)
        hdfs_path = get_fs_path("/test-warehouse/{0}.db/{1}/".format(
            unique_database, target_table))

        # Create table
        query = "create table {0} like {1} stored as parquet".format(
            qualified_target_table, source_table)
        self.execute_query(query)

        # Insert data
        query = (
            "insert into {0} partition(year, month) select * from {1}").format(
                qualified_target_table, source_table)
        self.execute_query(query)

        # Download hdfs files and verify column orders
        check_call(['hdfs', 'dfs', '-get', hdfs_path, tmpdir.strpath])

        expected_col_orders = [ColumnOrder(TYPE_ORDER=TypeDefinedOrder())] * 11

        for root, subdirs, files in os.walk(tmpdir.strpath):
            for f in files:
                parquet_file = os.path.join(root, str(f))
                file_meta_data = get_parquet_metadata(parquet_file)
                assert file_meta_data.column_orders == expected_col_orders
Esempio n. 2
0
    def test_set_column_orders(self, vector, unique_database, tmpdir):
        """Tests that the Parquet writers set FileMetaData::column_orders."""
        source_table = "functional_parquet.alltypessmall"
        target_table = "test_set_column_orders"
        qualified_target_table = "{0}.{1}".format(unique_database,
                                                  target_table)
        hdfs_path = get_fs_path("/test-warehouse/{0}.db/{1}/".format(
            unique_database, target_table))

        # Create table
        query = "create table {0} like {1} stored as parquet".format(
            qualified_target_table, source_table)
        self.execute_query(query)

        # Insert data
        query = (
            "insert into {0} partition(year, month) select * from {1}").format(
                qualified_target_table, source_table)
        self.execute_query(query)

        # Download hdfs files and verify column orders
        file_metadata_list = get_parquet_metadata_from_hdfs_folder(
            hdfs_path, tmpdir.strpath)

        expected_col_orders = [ColumnOrder(TYPE_ORDER=TypeDefinedOrder())] * 11

        for file_metadata in file_metadata_list:
            assert file_metadata.column_orders == expected_col_orders