def test_fuzz_nested_types(self, vector, unique_database): table_format = vector.get_value('table_format') table_name = "complextypestbl" src_db = QueryTestSectionReader.get_db_name(table_format) if table_format.file_format not in ['parquet', 'orc']: pytest.skip() # Additional queries to scan the nested values. custom_queries = [ "select count(*) from (" " select distinct t.id, a.pos, a.item, aa.pos, aa.item, m.key, m.value," " ma.key, ma.value, t.nested_struct.* " " from complextypestbl t, t.int_array a, t.int_array_array.item aa, " " t.int_map m, t.int_map_array.item ma) q", "select count(*) from (" " select t.id, t.nested_struct.a, b.pos, b.item, i.e, i.f, m.key," " arr.pos, arr.item " " from complextypestbl t, t.nested_struct.b, t.nested_struct.c.d.item i," " t.nested_struct.g m, m.value.h.i arr) q", ] self.run_fuzz_test(vector, src_db, table_name, unique_database, table_name, 10, custom_queries=custom_queries)
def test_wide_table(self, vector): if vector.get_value('table_format').file_format == 'kudu': pytest.xfail("IMPALA-3718: Extend Kudu functional test support") NUM_COLS = vector.get_value('num_cols') # Due to the way HBase handles duplicate row keys, we have different number of # rows in HBase tables compared to HDFS tables. NUM_ROWS = 10 if vector.get_value('table_format').file_format != 'hbase' else 2 DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value('table_format')) TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS) result = self.client.execute("select count(*) from %s " % TABLE_NAME) assert result.data == [str(NUM_ROWS)] expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True) result = self.client.execute("select * from %s" % TABLE_NAME) if vector.get_value('table_format').file_format == 'hbase': assert len(result.data) == NUM_ROWS return types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult(expected_result, types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_fuzz_nested_types(self, vector, unique_database): table_format = vector.get_value('table_format') table_name = "complextypestbl" src_db = QueryTestSectionReader.get_db_name(table_format) if table_format.file_format != 'parquet': pytest.skip() self.run_fuzz_test(vector, src_db, table_name, unique_database, table_name, 10)
def __build_query(self): """Populates db, query_str, table_format_str""" self.db = QueryTestSectionReader.get_db_name(self.test_vector, self.scale_factor) self.query_str = QueryTestSectionReader.build_query(self.query_str.strip()) self.table_format_str = '%s/%s/%s' % (self.test_vector.file_format, self.test_vector.compression_codec, self.test_vector.compression_type)
def test_fuzz_nested_types(self, vector, unique_database): table_format = vector.get_value('table_format') table_name = "complextypestbl" src_db = QueryTestSectionReader.get_db_name(table_format) if table_format.file_format != 'parquet': pytest.skip() self.run_fuzz_test(vector, src_db, table_name, unique_database, table_name, 10)
def _build_query(self): """Populates db, query_str, table_format_str""" self.db = QueryTestSectionReader.get_db_name(self.test_vector, self.scale_factor) self.query_str = QueryTestSectionReader.build_query( self.query_str.strip()) self.table_format_str = '%s/%s/%s' % ( self.test_vector.file_format, self.test_vector.compression_codec, self.test_vector.compression_type)
def change_database(cls, impala_client, table_format=None, db_name=None, scale_factor=None): if db_name == None: assert table_format != None db_name = QueryTestSectionReader.get_db_name(table_format, scale_factor if scale_factor else '') query = 'use %s' % db_name # Clear the exec_options before executing a USE statement. # The USE statement should not fail for negative exec_option tests. impala_client.clear_configuration() impala_client.execute(query)
def change_database(cls, impala_client, table_format=None, db_name=None, scale_factor=None): if db_name == None: assert table_format != None db_name = QueryTestSectionReader.get_db_name(table_format, scale_factor if scale_factor else '') query = 'use %s' % db_name # Clear the exec_options before executing a USE statement. # The USE statement should not fail for negative exec_option tests. impala_client.clear_configuration() impala_client.execute(query)
def test_exprs(self, vector): # TODO: Enable some of these tests for Avro if possible # Don't attempt to evaluate timestamp expressions with Avro tables (which) # don't support a timestamp type)" table_format = vector.get_value('table_format') if table_format.file_format == 'avro': pytest.skip() if table_format.file_format == 'hbase': pytest.xfail("A lot of queries check for NULLs, which hbase does not recognize") self.run_test_case('QueryTest/exprs', vector) # This will change the current database to matching table format and then execute # select current_database(). An error will be thrown if multiple values are returned. current_db = self.execute_scalar('select current_database()', vector=vector) assert current_db == QueryTestSectionReader.get_db_name(table_format)
def test_exprs(self, vector): # TODO: Enable some of these tests for Avro if possible # Don't attempt to evaluate timestamp expressions with Avro tables (which) # don't support a timestamp type)" table_format = vector.get_value('table_format') if table_format.file_format == 'avro': pytest.skip() if table_format.file_format == 'hbase': pytest.xfail("A lot of queries check for NULLs, which hbase does not recognize") self.run_test_case('QueryTest/exprs', vector) # This will change the current database to matching table format and then execute # select current_database(). An error will be thrown if multiple values are returned. current_db = self.execute_scalar('select current_database()', vector=vector) assert current_db == QueryTestSectionReader.get_db_name(table_format)
def test_fuzz_decimal_tbl(self, vector, unique_database): table_format = vector.get_value('table_format') table_name = "decimal_tbl" if table_format.file_format == 'avro': table_name = "avro_decimal_tbl" if table_format.compression_codec != 'snap' or \ table_format.compression_type != 'block': pytest.skip() elif table_format.file_format == 'text' and \ table_format.compression_codec != 'none': # decimal_tbl is not present for these file formats pytest.skip() src_db = QueryTestSectionReader.get_db_name(table_format) self.run_fuzz_test(vector, src_db, table_name, unique_database, table_name, 10)
def test_fuzz_decimal_tbl(self, vector, unique_database): table_format = vector.get_value('table_format') table_name = "decimal_tbl" if table_format.file_format == 'avro': table_name = "avro_decimal_tbl" if table_format.compression_codec != 'snap' or \ table_format.compression_type != 'block': pytest.skip() elif table_format.file_format == 'rc' or \ table_format.file_format == 'seq': pytest.skip() elif table_format.file_format == 'text' and \ table_format.compression_codec != 'none': # decimal_tbl is not present for these file formats pytest.skip() src_db = QueryTestSectionReader.get_db_name(table_format) self.run_fuzz_test(vector, src_db, table_name, unique_database, table_name, 10)
def test_exprs(self, vector): vector.get_value('exec_option')['enable_expr_rewrites'] = \ vector.get_value('enable_expr_rewrites') # TODO: Enable some of these tests for Avro if possible # Don't attempt to evaluate timestamp expressions with Avro tables (which don't # support a timestamp type)" table_format = vector.get_value('table_format') if table_format.file_format == 'avro': pytest.skip() if table_format.file_format == 'hbase': pytest.xfail("A lot of queries check for NULLs, which hbase does not recognize") if table_format.file_format == 'kudu': # Can't load LikeTbl without KUDU-1570. pytest.xfail("Need support for Kudu tables with nullable PKs (KUDU-1570)") self.run_test_case('QueryTest/exprs', vector) # This will change the current database to matching table format and then execute # select current_database(). An error will be thrown if multiple values are returned. current_db = self.execute_scalar('select current_database()', vector=vector) assert current_db == QueryTestSectionReader.get_db_name(table_format)
def test_wide_table(self, vector): NUM_COLS = vector.get_value('num_cols') # Due to the way HBase handles duplicate row keys, we have different number of # rows in HBase tables compared to HDFS tables. NUM_ROWS = 10 if vector.get_value('table_format').file_format != 'hbase' else 2 DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value('table_format')) TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS) result = self.client.execute("select count(*) from %s " % TABLE_NAME) assert result.data == [str(NUM_ROWS)] expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True) result = self.client.execute("select * from %s" % TABLE_NAME) if vector.get_value('table_format').file_format == 'hbase': assert len(result.data) == NUM_ROWS return types = parse_column_types(result.schema) labels = parse_column_labels(result.schema) expected = QueryTestResult(expected_result, types, labels, order_matters=False) actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False) assert expected == actual
def test_fuzz_alltypes(self, vector, unique_database): table_format = vector.get_value('table_format') src_db = QueryTestSectionReader.get_db_name(table_format) table_name = "alltypes" self.run_fuzz_test(vector, src_db, table_name, unique_database, table_name)
def test_fuzz_alltypes(self, vector, unique_database): table_format = vector.get_value('table_format') src_db = QueryTestSectionReader.get_db_name(table_format) table_name = "alltypes" self.run_fuzz_test(vector, src_db, table_name, unique_database, table_name)