def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None, prepend_db_name=True, db_name=None): """Create and return a table. This function should be used in a "with" context. 'kudu' must be a kudu.client.Client. If a table name is not provided, a random name will be used. If 'prepend_db_name' is True, the table name will be prepended with (get_db_name() + "."). If column names are not provided, the letters "a", "b", "c", ... will be used. Example: with self.temp_kudu_table(kudu, [INT32]) as kudu_table: assert kudu.table_exists(kudu_table.name) assert not kudu.table_exists(kudu_table.name) """ if not col_names: if len(col_types) > 26: raise Exception("Too many columns for default naming") col_names = [chr(97 + i) for i in xrange(len(col_types))] schema_builder = SchemaBuilder() for i, t in enumerate(col_types): column_spec = schema_builder.add_column(col_names[i], type_=t) if i < num_key_cols: column_spec.nullable(False) schema_builder.set_primary_keys(col_names[:num_key_cols]) schema = schema_builder.build() name = name or self.random_table_name() if prepend_db_name: name = (db_name or self.get_db_name().lower()) + "." + name kudu.create_table(name, schema, partitioning=Partitioning().add_hash_partitions(col_names[:num_key_cols], 2)) try: yield kudu.table(name) finally: if kudu.table_exists(name): kudu.delete_table(name)
def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client, unique_database): """Check that a single unbounded range partition gets printed correctly.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".unbounded_range_table" try: kudu_client.create_table(name, schema, partitioning=Partitioning().set_range_partition_columns(["id"])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name) assert cursor.description == [ ('RANGE (id)', 'STRING', None, None, None, None, None)] assert cursor.fetchall() == [('UNBOUNDED',)] finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def test_column_name_case(self, cursor, kudu_client, unique_database): """IMPALA-5286: Tests that an external Kudu table that was created with a column name containing upper case letters is handled correctly.""" table_name = '%s.kudu_external_test' % unique_database if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name) schema_builder = SchemaBuilder() key_col = 'Key' schema_builder.add_column(key_col, INT64).nullable(False).primary_key() schema = schema_builder.build() partitioning = Partitioning().set_range_partition_columns([key_col])\ .add_range_partition([1], [10]) try: kudu_client.create_table(table_name, schema, partitioning) props = "tblproperties('kudu.table_name' = '%s')" % table_name cursor.execute("create external table %s stored as kudu %s" % (table_name, props)) # Perform a variety of operations on the table. cursor.execute("insert into %s (kEy) values (5), (1), (4)" % table_name) cursor.execute("select keY from %s where KeY %% 2 = 0" % table_name) assert cursor.fetchall() == [(4, )] cursor.execute("select * from %s order by kEY" % (table_name)) assert cursor.fetchall() == [(1, ), (4, ), (5, )] cursor.execute( "alter table %s add range partition 11 < values < 20" % table_name) new_key = "KEY2" cursor.execute("alter table %s change KEy %s bigint" % (table_name, new_key)) val_col = "vaL" cursor.execute("alter table %s add columns (%s bigint)" % (table_name, val_col)) cursor.execute("describe %s" % table_name) results = cursor.fetchall() # 'describe' should print the column name in lower case. assert new_key.lower() in results[0] assert val_col.lower() in results[1] cursor.execute("alter table %s drop column Val" % table_name) cursor.execute("describe %s" % table_name) assert len(cursor.fetchall()) == 1 cursor.execute( "alter table %s drop range partition 11 < values < 20" % table_name) finally: if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name)
def schema_builder(): """ Create a kudu.SchemaBuilder instance Examples -------- builder = kudu.schema_builder() builder.add_column('key1', kudu.int64, nullable=False) builder.add_column('key2', kudu.int32, nullable=False) (builder.add_column('name', kudu.string) .nullable() .compression('lz4')) builder.add_column('value1', kudu.double) builder.add_column('value2', kudu.int8, encoding='rle') builder.set_primary_keys(['key1', 'key2']) schema = builder.build() Returns ------- builder : SchemaBuilder """ return SchemaBuilder()
def test_conflicting_column_name(self, cursor, kudu_client, unique_database): """IMPALA-5283: Tests that loading an external Kudu table that was created with column names that differ only in case results in an error.""" table_name = '%s.kudu_external_test' % unique_database if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name) schema_builder = SchemaBuilder() col0 = 'col' schema_builder.add_column(col0, INT64).nullable(False).primary_key() col1 = 'COL' schema_builder.add_column(col1, INT64) schema = schema_builder.build() partitioning = Partitioning().set_range_partition_columns([col0])\ .add_range_partition([1], [10]) try: kudu_client.create_table(table_name, schema, partitioning) props = "tblproperties('kudu.table_name' = '%s')" % table_name cursor.execute("create external table %s stored as kudu %s" % (table_name, props)) assert False, 'create table should have resulted in an exception' except Exception as e: assert 'Error loading Kudu table: Impala does not support column names that ' \ + 'differ only in casing' in str(e) finally: if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name)
def test_table_without_partitioning(self, cursor, kudu_client, unique_database): """Test a Kudu table created without partitioning (i.e. equivalent to a single unbounded partition). It is not possible to create such a table in Impala, but it can be created directly in Kudu and then loaded as an external table. Regression test for IMPALA-5154.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() partitioning = Partitioning().set_range_partition_columns([]) name = "%s.one_big_unbounded_partition" % unique_database try: kudu_client.create_table(name, schema, partitioning=partitioning) kudu_table = kudu_client.table(name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (name, props)) with self.drop_impala_table_after_context(cursor, name): cursor.execute("INSERT INTO %s VALUES (1), (2), (3)" % name) cursor.execute("SELECT COUNT(*) FROM %s" % name) assert cursor.fetchall() == [(3, )] try: cursor.execute("SHOW RANGE PARTITIONS %s" % name) assert False except Exception as e: assert "AnalysisException: SHOW RANGE PARTITIONS requested but table does "\ "not have range partitions" in str(e) finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client, unique_database): """Check that a single unbounded range partition gets printed correctly.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".unbounded_range_table" try: kudu_client.create_table( name, schema, partitioning=Partitioning().set_range_partition_columns(["id" ])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name) assert cursor.description == [('RANGE (id)', 'STRING', None, None, None, None, None)] assert cursor.fetchall() == [('UNBOUNDED', )] finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None, prepend_db_name=True, db_name=None, num_partitions=2): """Create and return a table. This function should be used in a "with" context. 'kudu' must be a kudu.client.Client. If a table name is not provided, a random name will be used. If 'prepend_db_name' is True, the table name will be prepended with (get_db_name() + "."). If column names are not provided, the letters "a", "b", "c", ... will be used. The number of partitions can be set using 'num_partitions'. Example: with self.temp_kudu_table(kudu, [INT32]) as kudu_table: assert kudu.table_exists(kudu_table.name) assert not kudu.table_exists(kudu_table.name) """ if not col_names: if len(col_types) > 26: raise Exception("Too many columns for default naming") col_names = [chr(97 + i) for i in xrange(len(col_types))] schema_builder = SchemaBuilder() for i, t in enumerate(col_types): column_spec = schema_builder.add_column(col_names[i], type_=t) if i < num_key_cols: column_spec.nullable(False) schema_builder.set_primary_keys(col_names[:num_key_cols]) schema = schema_builder.build() name = name or self.random_table_name() if prepend_db_name: name = (db_name or self.get_db_name().lower()) + "." + name kudu.create_table(name, schema, partitioning=Partitioning().add_hash_partitions( col_names[:num_key_cols], num_partitions)) try: yield kudu.table(name) finally: if kudu.table_exists(name): kudu.delete_table(name)
def test_external_timestamp_default_value(self, cursor, kudu_client, unique_database): """Checks that a Kudu table created outside Impala with a default value on a UNIXTIME_MICROS column can be loaded by Impala, and validates the DESCRIBE output is correct.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) column_spec = schema_builder.add_column("ts", UNIXTIME_MICROS) column_spec.default(datetime(2009, 1, 1, 0, 0, tzinfo=utc)) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".tsdefault" try: kudu_client.create_table( name, schema, partitioning=Partitioning().set_range_partition_columns(["id" ])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("DESCRIBE %s" % impala_table_name) table_desc = [[col.strip() if col else col for col in row] for row in cursor] # Pytest shows truncated output on failure, so print the details just in case. LOG.info(table_desc) assert ["ts", "timestamp", "", "false", "true", "1230768000000000", \ "AUTO_ENCODING", "DEFAULT_COMPRESSION", "0"] in table_desc finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)