def test_wide_table(self, vector): NUM_COLS = vector.get_value('num_cols') NUM_ROWS = 10 DB_NAME = QueryTestSectionReader.get_db_name( vector.get_value('table_format')) TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS) result = self.client.execute("select count(*) from %s " % TABLE_NAME) assert result.data == [str(NUM_ROWS)] expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True) result = self.client.execute("select * from %s" % TABLE_NAME) types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult(expected_result, types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_insert_wide_table(self, vector): table_format = vector.get_value('table_format') # Text can't handle as many columns as Parquet (codegen takes forever) num_cols = 1000 if table_format.file_format == 'text' else 2000 db_name = QueryTestSectionReader.get_db_name(vector.get_value('table_format')) table_name = db_name + ".insert_widetable" if vector.get_value('exec_option')['disable_codegen']: table_name += "_codegen_disabled" self.client.execute("drop table if exists " + table_name) col_descs = widetable.get_columns(num_cols) create_stmt = "CREATE TABLE " + table_name + "(" + ','.join(col_descs) + ")" if vector.get_value('table_format').file_format == 'parquet': create_stmt += " stored as parquet" self.client.execute(create_stmt) # Get a single row of data col_vals = widetable.get_data(num_cols, 1, quote_strings=True)[0] insert_stmt = "INSERT INTO " + table_name + " VALUES(" + col_vals + ")" self.client.execute(insert_stmt) result = self.client.execute("select count(*) from " + table_name) assert result.data == ["1"] result = self.client.execute("select * from " + table_name) types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult([col_vals], types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_wide_table(self, vector): if vector.get_value('table_format').file_format == 'kudu': pytest.xfail("IMPALA-3718: Extend Kudu functional test support") NUM_COLS = vector.get_value('num_cols') # Due to the way HBase handles duplicate row keys, we have different number of # rows in HBase tables compared to HDFS tables. NUM_ROWS = 10 if vector.get_value('table_format').file_format != 'hbase' else 2 DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value('table_format')) TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS) result = self.client.execute("select count(*) from %s " % TABLE_NAME) assert result.data == [str(NUM_ROWS)] expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True) result = self.client.execute("select * from %s" % TABLE_NAME) if vector.get_value('table_format').file_format == 'hbase': assert len(result.data) == NUM_ROWS return types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult(expected_result, types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_many_grouping_columns(self, vector): """Test that an aggregate with many grouping columns works""" table_format = vector.get_value('table_format') exec_option = vector.get_value('exec_option') query = "select distinct * from widetable_1000_cols" # Ensure codegen is enabled. result = self.execute_query(query, exec_option, table_format=table_format) # All rows should be distinct. expected_result = widetable.get_data(1000, 10, quote_strings=True) types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult(expected_result, types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_insert_wide_table(self, vector, unique_database): table_format = vector.get_value('table_format') # Text can't handle as many columns as Parquet (codegen takes forever) num_cols = 1000 if table_format.file_format == 'text' else 2000 table_name = unique_database + ".insert_widetable" if vector.get_value('exec_option')['disable_codegen']: table_name += "_codegen_disabled" col_descs = widetable.get_columns(num_cols) create_stmt = "CREATE TABLE " + table_name + "(" + ','.join(col_descs) + ")" if vector.get_value('table_format').file_format == 'parquet': create_stmt += " stored as parquet" self.client.execute(create_stmt) # Get a single row of data col_vals = widetable.get_data(num_cols, 1, quote_strings=True)[0] insert_stmt = "INSERT INTO " + table_name + " VALUES(" + col_vals + ")" self.client.execute(insert_stmt) result = self.client.execute("select count(*) from " + table_name) assert result.data == ["1"] result = self.client.execute("select * from " + table_name) types = result.column_types labels = result.column_labels expected = QueryTestResult([col_vals], types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_wide_table(self, vector): NUM_COLS = vector.get_value("num_cols") NUM_ROWS = 10 DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value("table_format")) TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS) result = self.client.execute("select count(*) from %s " % TABLE_NAME) assert result.data == [str(NUM_ROWS)] expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True) result = self.client.execute("select * from %s" % TABLE_NAME) types = parse_column_types(result.schema) expected = QueryTestResult(expected_result, types, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, order_matters=False) assert expected == actual
def test_many_grouping_columns(self, vector): """Test that an aggregate with many grouping columns works""" table_format = vector.get_value('table_format') exec_option = vector.get_value('exec_option') query = "select distinct * from widetable_1000_cols" # Ensure codegen is enabled. result = self.execute_query(query, exec_option, table_format=table_format) # All rows should be distinct. expected_result = widetable.get_data(1000, 10, quote_strings=True) types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult(expected_result, types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_wide_table(self, vector): NUM_COLS = vector.get_value('num_cols') # Due to the way HBase handles duplicate row keys, we have different number of # rows in HBase tables compared to HDFS tables. NUM_ROWS = 10 if vector.get_value('table_format').file_format != 'hbase' else 2 DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value('table_format')) TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS) result = self.client.execute("select count(*) from %s " % TABLE_NAME) assert result.data == [str(NUM_ROWS)] expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True) result = self.client.execute("select * from %s" % TABLE_NAME) if vector.get_value('table_format').file_format == 'hbase': assert len(result.data) == NUM_ROWS return types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult(expected_result, types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual