def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None,
      prepend_db_name=True, db_name=None):
    """Create and return a table. This function should be used in a "with" context.
       'kudu' must be a kudu.client.Client. If a table name is not provided, a random
       name will be used. If 'prepend_db_name' is True, the table name will be prepended
       with (get_db_name() + "."). If column names are not provided, the letters
       "a", "b", "c", ... will be used.

       Example:
         with self.temp_kudu_table(kudu, [INT32]) as kudu_table:
            assert kudu.table_exists(kudu_table.name)
         assert not kudu.table_exists(kudu_table.name)
    """
    if not col_names:
      if len(col_types) > 26:
        raise Exception("Too many columns for default naming")
      col_names = [chr(97 + i) for i in xrange(len(col_types))]
    schema_builder = SchemaBuilder()
    for i, t in enumerate(col_types):
      column_spec = schema_builder.add_column(col_names[i], type_=t)
      if i < num_key_cols:
        column_spec.nullable(False)
    schema_builder.set_primary_keys(col_names[:num_key_cols])
    schema = schema_builder.build()
    name = name or self.random_table_name()
    if prepend_db_name:
      name = (db_name or self.get_db_name().lower()) + "." + name
    kudu.create_table(name, schema,
        partitioning=Partitioning().add_hash_partitions(col_names[:num_key_cols], 2))
    try:
      yield kudu.table(name)
    finally:
      if kudu.table_exists(name):
        kudu.delete_table(name)
  def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client,
                                               unique_database):
    """Check that a single unbounded range partition gets printed correctly."""
    schema_builder = SchemaBuilder()
    column_spec = schema_builder.add_column("id", INT64)
    column_spec.nullable(False)
    schema_builder.set_primary_keys(["id"])
    schema = schema_builder.build()

    name = unique_database + ".unbounded_range_table"

    try:
      kudu_client.create_table(name, schema,
                        partitioning=Partitioning().set_range_partition_columns(["id"]))
      kudu_table = kudu_client.table(name)

      impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
      props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
      cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name,
          props))
      with self.drop_impala_table_after_context(cursor, impala_table_name):
        cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name)
        assert cursor.description == [
          ('RANGE (id)', 'STRING', None, None, None, None, None)]
        assert cursor.fetchall() == [('UNBOUNDED',)]

    finally:
      if kudu_client.table_exists(name):
        kudu_client.delete_table(name)
Beispiel #3
0
    def test_column_name_case(self, cursor, kudu_client, unique_database):
        """IMPALA-5286: Tests that an external Kudu table that was created with a column name
       containing upper case letters is handled correctly."""
        table_name = '%s.kudu_external_test' % unique_database
        if kudu_client.table_exists(table_name):
            kudu_client.delete_table(table_name)

        schema_builder = SchemaBuilder()
        key_col = 'Key'
        schema_builder.add_column(key_col, INT64).nullable(False).primary_key()
        schema = schema_builder.build()
        partitioning = Partitioning().set_range_partition_columns([key_col])\
            .add_range_partition([1], [10])

        try:
            kudu_client.create_table(table_name, schema, partitioning)

            props = "tblproperties('kudu.table_name' = '%s')" % table_name
            cursor.execute("create external table %s stored as kudu %s" %
                           (table_name, props))

            # Perform a variety of operations on the table.
            cursor.execute("insert into %s (kEy) values (5), (1), (4)" %
                           table_name)
            cursor.execute("select keY from %s where KeY %% 2 = 0" %
                           table_name)
            assert cursor.fetchall() == [(4, )]
            cursor.execute("select * from %s order by kEY" % (table_name))
            assert cursor.fetchall() == [(1, ), (4, ), (5, )]
            cursor.execute(
                "alter table %s add range partition 11 < values < 20" %
                table_name)

            new_key = "KEY2"
            cursor.execute("alter table %s change KEy %s bigint" %
                           (table_name, new_key))
            val_col = "vaL"
            cursor.execute("alter table %s add columns (%s bigint)" %
                           (table_name, val_col))

            cursor.execute("describe %s" % table_name)
            results = cursor.fetchall()
            # 'describe' should print the column name in lower case.
            assert new_key.lower() in results[0]
            assert val_col.lower() in results[1]

            cursor.execute("alter table %s drop column Val" % table_name)
            cursor.execute("describe %s" % table_name)
            assert len(cursor.fetchall()) == 1

            cursor.execute(
                "alter table %s drop range partition 11 < values < 20" %
                table_name)
        finally:
            if kudu_client.table_exists(table_name):
                kudu_client.delete_table(table_name)
Beispiel #4
0
def schema_builder():
    """
    Create a kudu.SchemaBuilder instance

    Examples
    --------
    builder = kudu.schema_builder()
    builder.add_column('key1', kudu.int64, nullable=False)
    builder.add_column('key2', kudu.int32, nullable=False)

    (builder.add_column('name', kudu.string)
     .nullable()
     .compression('lz4'))

    builder.add_column('value1', kudu.double)
    builder.add_column('value2', kudu.int8, encoding='rle')
    builder.set_primary_keys(['key1', 'key2'])

    schema = builder.build()

    Returns
    -------
    builder : SchemaBuilder
    """
    return SchemaBuilder()
Beispiel #5
0
    def test_conflicting_column_name(self, cursor, kudu_client,
                                     unique_database):
        """IMPALA-5283: Tests that loading an external Kudu table that was created with column
       names that differ only in case results in an error."""
        table_name = '%s.kudu_external_test' % unique_database
        if kudu_client.table_exists(table_name):
            kudu_client.delete_table(table_name)

        schema_builder = SchemaBuilder()
        col0 = 'col'
        schema_builder.add_column(col0, INT64).nullable(False).primary_key()
        col1 = 'COL'
        schema_builder.add_column(col1, INT64)
        schema = schema_builder.build()
        partitioning = Partitioning().set_range_partition_columns([col0])\
            .add_range_partition([1], [10])

        try:
            kudu_client.create_table(table_name, schema, partitioning)

            props = "tblproperties('kudu.table_name' = '%s')" % table_name
            cursor.execute("create external table %s stored as kudu %s" %
                           (table_name, props))
            assert False, 'create table should have resulted in an exception'
        except Exception as e:
            assert 'Error loading Kudu table: Impala does not support column names that ' \
                + 'differ only in casing' in str(e)
        finally:
            if kudu_client.table_exists(table_name):
                kudu_client.delete_table(table_name)
Beispiel #6
0
    def test_table_without_partitioning(self, cursor, kudu_client,
                                        unique_database):
        """Test a Kudu table created without partitioning (i.e. equivalent to a single
       unbounded partition). It is not possible to create such a table in Impala, but
       it can be created directly in Kudu and then loaded as an external table.
       Regression test for IMPALA-5154."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()
        partitioning = Partitioning().set_range_partition_columns([])
        name = "%s.one_big_unbounded_partition" % unique_database

        try:
            kudu_client.create_table(name, schema, partitioning=partitioning)
            kudu_table = kudu_client.table(name)

            props = "TBLPROPERTIES('kudu.table_name'='%s')" % name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (name, props))
            with self.drop_impala_table_after_context(cursor, name):
                cursor.execute("INSERT INTO %s VALUES (1), (2), (3)" % name)
                cursor.execute("SELECT COUNT(*) FROM %s" % name)
                assert cursor.fetchall() == [(3, )]
                try:
                    cursor.execute("SHOW RANGE PARTITIONS %s" % name)
                    assert False
                except Exception as e:
                    assert "AnalysisException: SHOW RANGE PARTITIONS requested but table does "\
                        "not have range partitions" in str(e)
        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)
Beispiel #7
0
    def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client,
                                                 unique_database):
        """Check that a single unbounded range partition gets printed correctly."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()

        name = unique_database + ".unbounded_range_table"

        try:
            kudu_client.create_table(
                name,
                schema,
                partitioning=Partitioning().set_range_partition_columns(["id"
                                                                         ]))
            kudu_table = kudu_client.table(name)

            impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
            props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (impala_table_name, props))
            with self.drop_impala_table_after_context(cursor,
                                                      impala_table_name):
                cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name)
                assert cursor.description == [('RANGE (id)', 'STRING', None,
                                               None, None, None, None)]
                assert cursor.fetchall() == [('UNBOUNDED', )]

        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)
Beispiel #8
0
    def temp_kudu_table(self,
                        kudu,
                        col_types,
                        name=None,
                        num_key_cols=1,
                        col_names=None,
                        prepend_db_name=True,
                        db_name=None,
                        num_partitions=2):
        """Create and return a table. This function should be used in a "with" context.
       'kudu' must be a kudu.client.Client. If a table name is not provided, a random
       name will be used. If 'prepend_db_name' is True, the table name will be prepended
       with (get_db_name() + "."). If column names are not provided, the letters
       "a", "b", "c", ... will be used. The number of partitions can be set using
       'num_partitions'.

       Example:
         with self.temp_kudu_table(kudu, [INT32]) as kudu_table:
            assert kudu.table_exists(kudu_table.name)
         assert not kudu.table_exists(kudu_table.name)
    """
        if not col_names:
            if len(col_types) > 26:
                raise Exception("Too many columns for default naming")
            col_names = [chr(97 + i) for i in xrange(len(col_types))]
        schema_builder = SchemaBuilder()
        for i, t in enumerate(col_types):
            column_spec = schema_builder.add_column(col_names[i], type_=t)
            if i < num_key_cols:
                column_spec.nullable(False)
        schema_builder.set_primary_keys(col_names[:num_key_cols])
        schema = schema_builder.build()
        name = name or self.random_table_name()
        if prepend_db_name:
            name = (db_name or self.get_db_name().lower()) + "." + name
        kudu.create_table(name,
                          schema,
                          partitioning=Partitioning().add_hash_partitions(
                              col_names[:num_key_cols], num_partitions))
        try:
            yield kudu.table(name)
        finally:
            if kudu.table_exists(name):
                kudu.delete_table(name)
Beispiel #9
0
    def test_external_timestamp_default_value(self, cursor, kudu_client,
                                              unique_database):
        """Checks that a Kudu table created outside Impala with a default value on a
       UNIXTIME_MICROS column can be loaded by Impala, and validates the DESCRIBE
       output is correct."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        column_spec = schema_builder.add_column("ts", UNIXTIME_MICROS)
        column_spec.default(datetime(2009, 1, 1, 0, 0, tzinfo=utc))
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()
        name = unique_database + ".tsdefault"

        try:
            kudu_client.create_table(
                name,
                schema,
                partitioning=Partitioning().set_range_partition_columns(["id"
                                                                         ]))
            kudu_table = kudu_client.table(name)
            impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
            props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (impala_table_name, props))
            with self.drop_impala_table_after_context(cursor,
                                                      impala_table_name):
                cursor.execute("DESCRIBE %s" % impala_table_name)
                table_desc = [[col.strip() if col else col for col in row]
                              for row in cursor]
                # Pytest shows truncated output on failure, so print the details just in case.
                LOG.info(table_desc)
                assert ["ts", "timestamp", "", "false", "true", "1230768000000000", \
                  "AUTO_ENCODING", "DEFAULT_COMPRESSION", "0"] in table_desc
        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)