Python Partitioning Exemples, kudu.client.Partitioning Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_client.py Projet : skyline8888/kudu-1

    def test_create_partitioned_table(self):
        name = 'partitioned_table'
        try:
            self.client.create_table(
                name,
                self.schema,
                partitioning=Partitioning().add_hash_partitions(['key'], 2))
            # TODO: once the Python client can list partition info, assert that it was
            # created successfully here.
            self.client.delete_table(name)

            self.client.create_table(
                name,
                self.schema,
                partitioning=Partitioning().set_range_partition_columns([
                    'key'
                ]).add_range_partition_split({
                    'key': 10
                }).add_range_partition_split([20]).add_range_partition_split(
                    (30, )))
            self.client.delete_table(name)

            self.client.create_table(
                name,
                self.schema,
                partitioning=Partitioning().add_hash_partitions(['key'],
                                                                2,
                                                                seed=342310))
            self.client.delete_table(name)

        finally:
            try:
                self.client.delete_table(name)
            except:
                pass

Exemple #2

0

Afficher le fichier

    def create_tables(self):
        for table in ['measurements']:
            if self._kudu_client.table_exists(table):
                self._kudu_client.delete_table(table)

        # Define a schema for a tag_mappings table
        tm_builder = kudu.schema_builder()
        tm_builder.add_column('tag_id').type(
            kudu.int32).nullable(False).primary_key()
        tm_builder.add_column('sensor_name').type(kudu.string).nullable(False)
        tm_schema = tm_builder.build()

        # Define partitioning schema
        tm_partitioning = Partitioning().add_hash_partitions(
            column_names=['tag_id'], num_buckets=3)

        # Define a schema for a raw_measurements table
        rm_builder = kudu.schema_builder()
        rm_builder.add_column('record_time').type(kudu.string).nullable(False)
        rm_builder.add_column('tag_id').type(kudu.int32).nullable(False)
        rm_builder.add_column('value').type(kudu.double).nullable(False)
        rm_builder.set_primary_keys(['record_time', 'tag_id'])
        rm_schema = rm_builder.build()

        # Define partitioning schema
        rm_partitioning = Partitioning().add_hash_partitions(
            column_names=['record_time', 'tag_id'], num_buckets=3)

        # Define a schema for a measurements table
        m_builder = kudu.schema_builder()
        m_builder.add_column('record_time').type(kudu.string).nullable(False)
        for device_id in range(0, self._config['sensors']):
            m_builder.add_column('Sensor_%d' % device_id).type(
                kudu.double).nullable(True)
        m_builder.set_primary_keys(['record_time'])
        m_schema = m_builder.build()

        # Define partitioning schema
        m_partitioning = Partitioning().add_hash_partitions(
            column_names=['record_time'], num_buckets=3)

        # Create new table
        self._kudu_client.create_table('tag_mappings',
                                       tm_schema,
                                       tm_partitioning,
                                       n_replicas=3)
        self._kudu_client.create_table('raw_measurements',
                                       rm_schema,
                                       rm_partitioning,
                                       n_replicas=3)
        self._kudu_client.create_table('measurements',
                                       m_schema,
                                       m_partitioning,
                                       n_replicas=3)

Exemple #3

0

Afficher le fichier

    def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client,
                                                 unique_database):
        """Check that a single unbounded range partition gets printed correctly."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()

        name = unique_database + ".unbounded_range_table"

        try:
            kudu_client.create_table(
                name,
                schema,
                partitioning=Partitioning().set_range_partition_columns(["id"
                                                                         ]))
            kudu_table = kudu_client.table(name)

            impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
            props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (impala_table_name, props))
            with self.drop_impala_table_after_context(cursor,
                                                      impala_table_name):
                cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name)
                assert cursor.description == [('RANGE (id)', 'STRING', None,
                                               None, None, None, None)]
                assert cursor.fetchall() == [('UNBOUNDED', )]

        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)

Exemple #4

0

Afficher le fichier

    def test_table_without_partitioning(self, cursor, kudu_client,
                                        unique_database):
        """Test a Kudu table created without partitioning (i.e. equivalent to a single
       unbounded partition). It is not possible to create such a table in Impala, but
       it can be created directly in Kudu and then loaded as an external table.
       Regression test for IMPALA-5154."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()
        partitioning = Partitioning().set_range_partition_columns([])
        name = "%s.one_big_unbounded_partition" % unique_database

        try:
            kudu_client.create_table(name, schema, partitioning=partitioning)
            kudu_table = kudu_client.table(name)

            props = "TBLPROPERTIES('kudu.table_name'='%s')" % name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (name, props))
            with self.drop_impala_table_after_context(cursor, name):
                cursor.execute("INSERT INTO %s VALUES (1), (2), (3)" % name)
                cursor.execute("SELECT COUNT(*) FROM %s" % name)
                assert cursor.fetchall() == [(3, )]
                try:
                    cursor.execute("SHOW RANGE PARTITIONS %s" % name)
                    assert False
                except Exception as e:
                    assert "AnalysisException: SHOW RANGE PARTITIONS requested but table does "\
                        "not have range partitions" in str(e)
        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)

Exemple #5

0

Afficher le fichier

    def test_conflicting_column_name(self, cursor, kudu_client,
                                     unique_database):
        """IMPALA-5283: Tests that loading an external Kudu table that was created with column
       names that differ only in case results in an error."""
        table_name = '%s.kudu_external_test' % unique_database
        if kudu_client.table_exists(table_name):
            kudu_client.delete_table(table_name)

        schema_builder = SchemaBuilder()
        col0 = 'col'
        schema_builder.add_column(col0, INT64).nullable(False).primary_key()
        col1 = 'COL'
        schema_builder.add_column(col1, INT64)
        schema = schema_builder.build()
        partitioning = Partitioning().set_range_partition_columns([col0])\
            .add_range_partition([1], [10])

        try:
            kudu_client.create_table(table_name, schema, partitioning)

            props = "tblproperties('kudu.table_name' = '%s')" % table_name
            cursor.execute("create external table %s stored as kudu %s" %
                           (table_name, props))
            assert False, 'create table should have resulted in an exception'
        except Exception as e:
            assert 'Error loading Kudu table: Impala does not support column names that ' \
                + 'differ only in casing' in str(e)
        finally:
            if kudu_client.table_exists(table_name):
                kudu_client.delete_table(table_name)

Exemple #6

0

Afficher le fichier

    def test_column_name_case(self, cursor, kudu_client, unique_database):
        """IMPALA-5286: Tests that an external Kudu table that was created with a column name
       containing upper case letters is handled correctly."""
        table_name = '%s.kudu_external_test' % unique_database
        if kudu_client.table_exists(table_name):
            kudu_client.delete_table(table_name)

        schema_builder = SchemaBuilder()
        key_col = 'Key'
        schema_builder.add_column(key_col, INT64).nullable(False).primary_key()
        schema = schema_builder.build()
        partitioning = Partitioning().set_range_partition_columns([key_col])\
            .add_range_partition([1], [10])

        try:
            kudu_client.create_table(table_name, schema, partitioning)

            props = "tblproperties('kudu.table_name' = '%s')" % table_name
            cursor.execute("create external table %s stored as kudu %s" %
                           (table_name, props))

            # Perform a variety of operations on the table.
            cursor.execute("insert into %s (kEy) values (5), (1), (4)" %
                           table_name)
            cursor.execute("select keY from %s where KeY %% 2 = 0" %
                           table_name)
            assert cursor.fetchall() == [(4, )]
            cursor.execute("select * from %s order by kEY" % (table_name))
            assert cursor.fetchall() == [(1, ), (4, ), (5, )]
            cursor.execute(
                "alter table %s add range partition 11 < values < 20" %
                table_name)

            new_key = "KEY2"
            cursor.execute("alter table %s change KEy %s bigint" %
                           (table_name, new_key))
            val_col = "vaL"
            cursor.execute("alter table %s add columns (%s bigint)" %
                           (table_name, val_col))

            cursor.execute("describe %s" % table_name)
            results = cursor.fetchall()
            # 'describe' should print the column name in lower case.
            assert new_key.lower() in results[0]
            assert val_col.lower() in results[1]

            cursor.execute("alter table %s drop column Val" % table_name)
            cursor.execute("describe %s" % table_name)
            assert len(cursor.fetchall()) == 1

            cursor.execute(
                "alter table %s drop range partition 11 < values < 20" %
                table_name)
        finally:
            if kudu_client.table_exists(table_name):
                kudu_client.delete_table(table_name)

Exemple #7

0

Afficher le fichier

Fichier : kudu.py Projet : mycastiel/kudu

    def partition(hash_columns: list, range_columns: list = None, bound: dict = None, bucket_num=3) -> Partitioning:
        # Define partitioning schema
        partition = Partitioning()
        for column in hash_columns:
            partition.add_hash_partitions(column_names=column, num_buckets=bucket_num)

        partition.set_range_partition_columns(range_columns)
        partition.add_range_partition(
            lower_bound=bound.get("lower_bound"),
            upper_bound=bound.get("upper_bound"),
            lower_bound_type=bound.get("lower_bound_type") or "inclusive",
            upper_bound_type=bound.get("upper_bound_type") or "exclusive"
        )

        return partition

Exemple #8

0

Afficher le fichier

Fichier : test_client.py Projet : zhangyifan27/kudu

    def test_create_table_with_different_owner(self):
        name = 'table_with_different_owner'
        try:
            self.client.create_table(
                    name, self.schema,
                    partitioning=Partitioning().add_hash_partitions(['key'], 2),
                    owner='alice')

            self.assertEqual('alice', self.client.table(name).owner)

        finally:
            try:
                self.client.delete_table(name)
            except:
                pass

Exemple #9

0

Afficher le fichier

Fichier : test_client.py Projet : zhangyifan27/kudu

    def test_create_table_with_different_comment(self):
        name = 'table_with_different_comment'
        try:
            self.client.create_table(
                name, self.schema,
                partitioning=Partitioning().add_hash_partitions(['key'], 2),
                comment='new comment')

            self.assertEqual('new comment', self.client.table(name).comment)

        finally:
            try:
                self.client.delete_table(name)
            except:
                pass

Exemple #10

0

Afficher le fichier

Fichier : kudu_test_suite.py Projet : yx91490/impala-1

    def temp_kudu_table(self,
                        kudu,
                        col_types,
                        name=None,
                        num_key_cols=1,
                        col_names=None,
                        prepend_db_name=True,
                        db_name=None,
                        num_partitions=2):
        """Create and return a table. This function should be used in a "with" context.
       'kudu' must be a kudu.client.Client. If a table name is not provided, a random
       name will be used. If 'prepend_db_name' is True, the table name will be prepended
       with (get_db_name() + "."). If column names are not provided, the letters
       "a", "b", "c", ... will be used. The number of partitions can be set using
       'num_partitions'.

       Example:
         with self.temp_kudu_table(kudu, [INT32]) as kudu_table:
            assert kudu.table_exists(kudu_table.name)
         assert not kudu.table_exists(kudu_table.name)
    """
        if not col_names:
            if len(col_types) > 26:
                raise Exception("Too many columns for default naming")
            col_names = [chr(97 + i) for i in xrange(len(col_types))]
        schema_builder = SchemaBuilder()
        for i, t in enumerate(col_types):
            column_spec = schema_builder.add_column(col_names[i], type_=t)
            if i < num_key_cols:
                column_spec.nullable(False)
        schema_builder.set_primary_keys(col_names[:num_key_cols])
        schema = schema_builder.build()
        name = name or self.random_table_name()
        if prepend_db_name:
            name = (db_name or self.get_db_name().lower()) + "." + name
        kudu.create_table(name,
                          schema,
                          partitioning=Partitioning().add_hash_partitions(
                              col_names[:num_key_cols], num_partitions))
        try:
            yield kudu.table(name)
        finally:
            if kudu.table_exists(name):
                kudu.delete_table(name)

Exemple #11

0

Afficher le fichier

Fichier : test_client.py Projet : malli3131/kudu-1

    def test_create_table_with_different_replication_factors(self):
        name = "different_replica_table"

        # Test setting the number of replicas for 1, 3 and 5 provided that the
        # number does not exceed the number of tservers
        for n_replicas in [n for n in [1, 3, 5] if n <= self.NUM_TABLET_SERVERS]:
            try:
                self.client.create_table(
                    name, self.schema,
                    partitioning=Partitioning().add_hash_partitions(['key'], 2),
                    n_replicas=n_replicas)

                assert n_replicas == self.client.table(name).num_replicas

            finally:
                try:
                    self.client.delete_table(name)
                except:
                    pass

Exemple #12

0

Afficher le fichier

    def on_put(self, req, res, table):
        api = {'table': table, 'success': False}
        data = json.loads(req.bounded_stream.read().decode("utf-8"))

        client = kudu.connect(host='queen', port=7051)

        if not client.table_exists(table):
            builder = kudu.schema_builder()
            builder.add_column('_id').type(
                kudu.string).nullable(False).primary_key()

            if data:
                for i in data:
                    if data[i] == 'string':
                        builder.add_column(i).type(kudu.string)
                    elif data[i] == 'int':
                        builder.add_column(i).type(kudu.int64)
                    elif data[i] == 'time':
                        builder.add_column(i).type(kudu.unixtime_micros)
                    elif data[i] == 'float':
                        builder.add_column(i).type(kudu.float)
                    elif data[i] == 'double':
                        builder.add_column(i).type(kudu.float)
                    elif data[i] == 'decimal':
                        builder.add_column(i).type(kudu.decimal)
                    elif data[i] == 'binary':
                        builder.add_column(i).type(kudu.binary)
                    elif data[i] == 'bool':
                        builder.add_column(i).type(kudu.bool)
                    else:
                        builder.add_column(i).type(kudu.string)

            schema = builder.build()
            partitioning = Partitioning().add_hash_partitions(
                column_names=['_id'], num_buckets=3)
            client.create_table(table, schema, partitioning)
            api['success'] = True

        res.body = json.dumps(api)
        res.status = falcon.HTTP_200

Exemple #13

0

Afficher le fichier

Fichier : kudu_dstat.py Projet : zjfplayer2/kudu-examples

def open_or_create_table(client, table, drop=False):
    """Based on the default dstat column names create a new table indexed by a timstamp col"""
    exists = False
    if client.table_exists(table):
        exists = True
        if drop:
            client.delete_table(table)
            exists = False

    if not exists:
        # Create the schema for the table, basically all float cols
        builder = kudu.schema_builder()
        builder.add_column("ts", kudu.int64, nullable=False, primary_key=True)
        for col in DSTAT_COL_NAMES:
            builder.add_column(col, kudu.float_)
        schema = builder.build()

        # Create hash partitioning buckets
        partitioning = Partitioning().add_hash_partitions('ts', 2)

        client.create_table(table, schema, partitioning)

    return client.table(table)

Exemple #14

0

Afficher le fichier

    def test_external_timestamp_default_value(self, cursor, kudu_client,
                                              unique_database):
        """Checks that a Kudu table created outside Impala with a default value on a
       UNIXTIME_MICROS column can be loaded by Impala, and validates the DESCRIBE
       output is correct."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        column_spec = schema_builder.add_column("ts", UNIXTIME_MICROS)
        column_spec.default(datetime(2009, 1, 1, 0, 0, tzinfo=utc))
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()
        name = unique_database + ".tsdefault"

        try:
            kudu_client.create_table(
                name,
                schema,
                partitioning=Partitioning().set_range_partition_columns(["id"
                                                                         ]))
            kudu_table = kudu_client.table(name)
            impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
            props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (impala_table_name, props))
            with self.drop_impala_table_after_context(cursor,
                                                      impala_table_name):
                cursor.execute("DESCRIBE %s" % impala_table_name)
                table_desc = [[col.strip() if col else col for col in row]
                              for row in cursor]
                # Pytest shows truncated output on failure, so print the details just in case.
                LOG.info(table_desc)
                assert ["ts", "timestamp", "", "false", "true", "1230768000000000", \
                  "AUTO_ENCODING", "DEFAULT_COMPRESSION", "0"] in table_desc
        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)

Exemple #15

0

Afficher le fichier

Fichier : common.py Projet : macressler/incubator-kudu

 def example_partitioning(cls):
     return Partitioning().set_range_partition_columns(['key'])

Exemple #16

0

Afficher le fichier

    def setUpClass(self):
        """
        Parent class for both the Scan tests and the
        Scan Token tests
        """
        super(TestScanBase, self).setUpClass()

        self.nrows = 100
        table = self.client.table(self.ex_table)
        session = self.client.new_session()

        tuples = []
        for i in range(self.nrows):
            op = table.new_insert()
            tup = i, \
                  i * 2, \
                  'hello_%d' % i if i % 2 == 0 else None, \
                  datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            op['key'] = tup[0]
            op['int_val'] = tup[1]
            if i % 2 == 0:
                op['string_val'] = tup[2]
            op['unixtime_micros_val'] = tup[3]
            session.apply(op)
            tuples.append(tup)
        session.flush()

        self.table = table
        self.tuples = []

        # Replace missing values w/ defaults to test default values.
        for tuple in tuples:
            if tuple[2] == None:
                tuple = (tuple[0], tuple[1], 'nothing', tuple[3])
            self.tuples.append(tuple)

        # Create table to test all types
        # for various predicate tests
        table_name = 'type-test'
        # Create schema, partitioning and then table
        builder = kudu.schema_builder()
        builder.add_column('key').type(kudu.int64).nullable(False)
        builder.add_column('unixtime_micros_val',
                           type_=kudu.unixtime_micros,
                           nullable=False)
        if kudu.CLIENT_SUPPORTS_DECIMAL:
            builder.add_column('decimal_val',
                               type_=kudu.decimal,
                               precision=5,
                               scale=2)
        builder.add_column('string_val',
                           type_=kudu.string,
                           compression=kudu.COMPRESSION_LZ4,
                           encoding='prefix')
        builder.add_column('bool_val', type_=kudu.bool)
        builder.add_column('double_val', type_=kudu.double)
        builder.add_column('int8_val', type_=kudu.int8)
        builder.add_column('binary_val',
                           type_='binary',
                           compression=kudu.COMPRESSION_SNAPPY,
                           encoding='prefix')
        builder.add_column('float_val', type_=kudu.float)
        builder.set_primary_keys(['key', 'unixtime_micros_val'])
        schema = builder.build()

        self.projected_names_w_o_float = [
            col for col in schema.names if col != 'float_val'
        ]

        partitioning = Partitioning() \
            .add_hash_partitions(column_names=['key'], num_buckets=3)\
            .set_range_partition_columns(['unixtime_micros_val'])\
            .add_range_partition(
                upper_bound={'unixtime_micros_val': ("2016-01-01", "%Y-%m-%d")},
                upper_bound_type=kudu.EXCLUSIVE_BOUND
            )\
            .add_range_partition(
                lower_bound={'unixtime_micros_val': datetime.datetime(2016, 1, 1)},
                lower_bound_type='INCLUSIVE',
                upper_bound={'unixtime_micros_val': datetime.datetime(9999, 12, 31)}
            )

        self.client.create_table(table_name, schema, partitioning)
        self.type_table = self.client.table(table_name)

        # Insert new rows
        if kudu.CLIENT_SUPPORTS_DECIMAL:
            self.type_test_rows = [
                (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
                 Decimal('111.11'), "Test One", True,
                 1.7976931348623157 * (10 ^ 308), 127,
                 b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
                 3.402823 * (10 ^ 38)),
                (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc),
                 Decimal('0.99'), "测试二", False, 200.1, -1,
                 b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
                 -150.2)
            ]
        else:
            self.type_test_rows = [
                (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
                 "Test One", True, 1.7976931348623157 * (10 ^ 308), 127,
                 b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
                 3.402823 * (10 ^ 38)),
                (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二",
                 False, 200.1, -1,
                 b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
                 -150.2)
            ]
        session = self.client.new_session()
        for row in self.type_test_rows:
            op = self.type_table.new_insert(row)
            session.apply(op)
        session.flush()

        # Remove the float values from the type_test_rows tuples so we can
        # compare the other vals
        self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]

Exemple #17

0

Afficher le fichier

# Adding columns to Kudu Schema
builder.add_column('wallet_id', kudu.int64, nullable=False)
builder.add_column('txn_id', kudu.int64, nullable=False)
builder.add_column('timestamp', kudu.string, nullable=False)

for col in int_columns:
    builder.add_column(col, kudu.int64, nullable=False)
for col in text_columns:
    builder.add_column(col, kudu.string, nullable=False)  # double

# Adding Primary Keys
builder.set_primary_keys(['wallet_id', 'txn_id'])
# Building Schema
schema = builder.build()

# Creating Table
if client.table_exists('payment_history'):
    print(client.list_tables())
    client.delete_table('payment_history')

# Defining Partitioning Method
# partitioning = Partitioning().add_hash_partitions('wallet_id', 2)
# partitioning = Partitioning().set_range_partition_columns(['wallet_id'])
partitioning = Partitioning().set_range_partition_columns([])
client.create_table('payment_history', schema, partitioning)

print(schema)

print("ok")

Exemple #18

0

Afficher le fichier

Fichier : util.py Projet : cs-wang/kudu

    def setUpClass(self):
        """
        Parent class for both the Scan tests and the
        Scan Token tests
        """
        super(TestScanBase, self).setUpClass()

        self.nrows = 100
        table = self.client.table(self.ex_table)
        session = self.client.new_session()

        tuples = []
        for i in range(self.nrows):
            op = table.new_insert()
            tup = i, \
                  i * 2, \
                  'hello_%d' % i if i % 2 == 0 else None, \
                  datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            op['key'] = tup[0]
            op['int_val'] = tup[1]
            if i % 2 == 0:
                op['string_val'] = tup[2]
            elif i % 3 == 0:
                op['string_val'] = None
            op['unixtime_micros_val'] = tup[3]
            session.apply(op)
            tuples.append(tup)
        session.flush()

        self.table = table
        self.tuples = tuples

        # Create table to test all types
        # for various predicate tests
        table_name = 'type-test'
        # Create schema, partitioning and then table
        builder = kudu.schema_builder()
        builder.add_column('key').type(
            kudu.int64).nullable(False).primary_key()
        builder.add_column('unixtime_micros_val',
                           type_=kudu.unixtime_micros,
                           nullable=False)
        builder.add_column('string_val',
                           type_=kudu.string,
                           compression=kudu.COMPRESSION_LZ4,
                           encoding='prefix')
        builder.add_column('bool_val', type_=kudu.bool)
        builder.add_column('double_val', type_=kudu.double)
        builder.add_column('int8_val', type_=kudu.int8)
        builder.add_column('binary_val',
                           type_='binary',
                           compression=kudu.COMPRESSION_SNAPPY,
                           encoding='prefix')
        builder.add_column('float_val', type_=kudu.float)
        schema = builder.build()

        self.projected_names_w_o_float = [
            col for col in schema.names if col != 'float_val'
        ]

        partitioning = Partitioning().add_hash_partitions(column_names=['key'],
                                                          num_buckets=3)

        self.client.create_table(table_name, schema, partitioning)
        self.type_table = self.client.table(table_name)

        # Insert new rows
        self.type_test_rows = [
            (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
             "Test One", True, 1.7976931348623157 * (10 ^ 308), 127,
             b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
             3.402823 * (10 ^ 38)),
            (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二",
             False, 200.1, -1,
             b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
             -150.2)
        ]
        session = self.client.new_session()
        for row in self.type_test_rows:
            op = self.type_table.new_insert(row)
            session.apply(op)
        session.flush()

        # Remove the float values from the type_test_rows tuples so we can
        # compare the other vals
        self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]

Exemple #19

0

Afficher le fichier

#!/usr/bin/env python
import time
import kudu
from kudu.client import Partitioning
from datetime import datetime

table_name = 'master_foo'
# Mount/connect the Kudu queen 
client = kudu.connect(host='queen', port=7051)


builder = kudu.schema_builder()
builder.add_column('key').type(kudu.int64).nullable(False).primary_key()
builder.add_column('name').type(kudu.string)
schema = builder.build()
partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3)


try: 
  print('...try to open the table')
  table = client.table(table_name)
except Exception as e:
  print('...create table')
  client.create_table(table_name, schema, partitioning)  
  print('...wait 3 sec before access the table')
  time.sleep(3)
  table = client.table(table_name)
  no = 10000
  for i in range(no):
    print('add row {}'.format(no-i))
    op = table.new_insert({'key': i, 'name': 'foo{}'.format(i)})

Exemple #20

0

Afficher le fichier

Fichier : sample-kudu-app.py Projet : myloginid/getting-started-kudu

def executeCommand(client, command, tableName):
    print("Executing Command {} on table {}".format(command, tableName))

    if command == "create":
        # Creating a table requires just a few steps
        # - Define your schema
        # - Define your partitioning scheme
        # - Call the create_table API

        # Use the schema_builder to build your table's schema
        builder = kudu.schema_builder()

        # Lastname column
        builder.add_column('lastname').type('string').default(
            'doe').compression('snappy').encoding('plain').nullable(False)

        # State/Province the person lives in
        # Leave all defaults except for the type and nullability
        builder.add_column('state_prov').type('string').nullable(False)

        builder.add_column('key').type(kudu.int64).nullable(False)

        # We prefer using dot notation, so let's add a few more columns
        # using that strategy
        #  - type : We specify the string representation of types
        #  - default: Default value if none specified
        #  - compression: Compression type
        #  - encoding: Encoding strategy
        #  - nullable: Nullability
        #  - block_size: Target block size, overriding server defaults
        builder.add_column('firstname').type('string').default(
            'jane').compression('zlib').encoding('plain').nullable(
                False).block_size(20971520)

        # Use add_column list of parameters to specify properties
        # just as an example instead of dot notation.
        builder.add_column('ts_val',
                           type_=kudu.unixtime_micros,
                           nullable=False,
                           compression='lz4')

        # Set our primary key column(s)
        builder.set_primary_keys(['lastname', 'state_prov', 'key'])

        # Build the schema
        schema = builder.build()

        # Define Hash partitioned column by the state/province
        # Its quite possible the data would then be skewed across partitions
        # so what we'll do here is add a the optional 3rd parameter to
        # help randomize the mapping of rows to hash buckets.
        partitioning = Partitioning().add_hash_partitions(
            column_names=['state_prov'], num_buckets=3, seed=13)

        # We've hash partitioned according to the state, now let's further
        # range partition our content by lastname. If we wanted to find all
        # the "Smith" families in the state of Oregon, we would very quickly
        # be able to isolate those rows with this type of schema.
        # Set the range partition columns - these columns MUST be part of
        # the primary key columns.
        partitioning.set_range_partition_columns('lastname')
        # Add range partitions
        partitioning.add_range_partition(['A'], ['E'])
        # By default, lower bound is inclusive while upper is exclusive
        partitioning.add_range_partition(['E'], ['Z'],
                                         upper_bound_type='inclusive')

        # Create new table passing in the table name, schema, partitioning
        # object and the optional parameter of number of replicas for this
        # table. If none specified, then it'll go by the Kudu server default
        # value for number of replicas.
        client.create_table(tableName, schema, partitioning, 1)
    elif command == "insert":
        # Open a table
        table = client.table(tableName)

        # Create a new session so that we can apply write operations
        session = client.new_session()

        # We have a few flush modes at our disposal, namely:
        # FLUSH_MANUAL, FLUSH_AUTO_SYNC and FLUSH_AUTO_BACKGROUND
        # The default is FLUSH_MANUAL, and we want to flush manually for
        # our examples below. Just providing example on how to change it
        # needed.
        session.set_flush_mode(kudu.FLUSH_MANUAL)

        # We can set a timeout value as well in milliseconds. Set ours to
        # 3 seconds.
        session.set_timeout_ms(3000)

        # Insert a row
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        try:
            session.flush()
        except kudu.KuduBadStatus as e:
            (errorResult, overflowed) = session.get_pending_errors()
            print("Insert row failed: {} (more pending errors? {})".format(
                errorResult, overflowed))