def test_kll(self, vector, unique_database): create_table_from_parquet(self.client, unique_database, 'kll_sketches_from_hive') create_table_from_parquet(self.client, unique_database, 'kll_sketches_from_impala') self.run_test_case('QueryTest/datasketches-kll', vector, unique_database)
def test_page_index(self, vector, unique_database): """Test that using the Parquet page index works well. The various test files contain queries that exercise the page selection and value-skipping logic against columns with different types and encodings.""" create_table_from_parquet(self.client, unique_database, 'decimals_1_10') create_table_from_parquet(self.client, unique_database, 'nested_decimals') create_table_from_parquet(self.client, unique_database, 'double_nested_decimals') create_table_from_parquet(self.client, unique_database, 'alltypes_tiny_pages') create_table_from_parquet(self.client, unique_database, 'alltypes_tiny_pages_plain') for batch_size in [0, 1]: vector.get_value('exec_option')['batch_size'] = batch_size self.run_test_case('QueryTest/parquet-page-index', vector, unique_database) self.run_test_case('QueryTest/nested-types-parquet-page-index', vector, unique_database) self.run_test_case( 'QueryTest/parquet-page-index-alltypes-tiny-pages', vector, unique_database) self.run_test_case( 'QueryTest/parquet-page-index-alltypes-tiny-pages-plain', vector, unique_database) for batch_size in [0, 32]: vector.get_value('exec_option')['batch_size'] = batch_size self.run_test_case('QueryTest/parquet-page-index-large', vector, unique_database)
def test_invalid_stats(self, vector, unique_database): """IMPALA-6538" Test that reading parquet files with statistics with invalid 'min_value'/'max_value' fields works correctly. 'min_value' and 'max_value' are both NaNs, therefore we need to ignore them""" create_table_from_parquet(self.client, unique_database, 'min_max_is_nan') self.run_test_case('QueryTest/parquet-invalid-minmax-stats', vector, unique_database)
def _test_conversion_with_validation(self, vector, unique_database): """Test that timestamp validation also works as expected when converting timestamps. Runs as part of test_conversion() to avoid restarting the cluster.""" create_table_from_parquet(self.client, unique_database, "out_of_range_timestamp_hive_211") create_table_from_parquet(self.client, unique_database, "out_of_range_timestamp2_hive_211") # Allow tests to override abort_or_error del vector.get_value('exec_option')['abort_on_error'] self.run_test_case('QueryTest/out-of-range-timestamp-local-tz-conversion', vector, unique_database)
def test_page_index(self, vector, unique_database): """Test that using the Parquet page index works well. The various test files contain queries that exercise the page selection and value-skipping logic against columns with different types and encodings.""" new_vector = deepcopy(vector) del new_vector.get_value('exec_option')['abort_on_error'] create_table_from_parquet(self.client, unique_database, 'decimals_1_10') create_table_from_parquet(self.client, unique_database, 'nested_decimals') create_table_from_parquet(self.client, unique_database, 'double_nested_decimals') create_table_from_parquet(self.client, unique_database, 'alltypes_tiny_pages') create_table_from_parquet(self.client, unique_database, 'alltypes_tiny_pages_plain') create_table_from_parquet(self.client, unique_database, 'alltypes_empty_pages') create_table_from_parquet(self.client, unique_database, 'alltypes_invalid_pages') create_table_from_parquet(self.client, unique_database, 'customer_multiblock_page_index') for batch_size in [0, 1]: new_vector.get_value('exec_option')['batch_size'] = batch_size self.run_test_case('QueryTest/parquet-page-index', new_vector, unique_database) self.run_test_case('QueryTest/nested-types-parquet-page-index', new_vector, unique_database) self.run_test_case( 'QueryTest/parquet-page-index-alltypes-tiny-pages', new_vector, unique_database) self.run_test_case( 'QueryTest/parquet-page-index-alltypes-tiny-pages-plain', new_vector, unique_database) for batch_size in [0, 1, 2, 3, 4, 8, 16, 32, 64, 128, 256, 512]: new_vector.get_value('exec_option')['batch_size'] = batch_size self.run_test_case('QueryTest/parquet-page-index-large', new_vector, unique_database)