def test_create_partitioned_table(self): name = 'partitioned_table' try: self.client.create_table( name, self.schema, partitioning=Partitioning().add_hash_partitions(['key'], 2)) # TODO: once the Python client can list partition info, assert that it was # created successfully here. self.client.delete_table(name) self.client.create_table( name, self.schema, partitioning=Partitioning().set_range_partition_columns([ 'key' ]).add_range_partition_split({ 'key': 10 }).add_range_partition_split([20]).add_range_partition_split( (30, ))) self.client.delete_table(name) self.client.create_table( name, self.schema, partitioning=Partitioning().add_hash_partitions(['key'], 2, seed=342310)) self.client.delete_table(name) finally: try: self.client.delete_table(name) except: pass
def create_tables(self): for table in ['measurements']: if self._kudu_client.table_exists(table): self._kudu_client.delete_table(table) # Define a schema for a tag_mappings table tm_builder = kudu.schema_builder() tm_builder.add_column('tag_id').type( kudu.int32).nullable(False).primary_key() tm_builder.add_column('sensor_name').type(kudu.string).nullable(False) tm_schema = tm_builder.build() # Define partitioning schema tm_partitioning = Partitioning().add_hash_partitions( column_names=['tag_id'], num_buckets=3) # Define a schema for a raw_measurements table rm_builder = kudu.schema_builder() rm_builder.add_column('record_time').type(kudu.string).nullable(False) rm_builder.add_column('tag_id').type(kudu.int32).nullable(False) rm_builder.add_column('value').type(kudu.double).nullable(False) rm_builder.set_primary_keys(['record_time', 'tag_id']) rm_schema = rm_builder.build() # Define partitioning schema rm_partitioning = Partitioning().add_hash_partitions( column_names=['record_time', 'tag_id'], num_buckets=3) # Define a schema for a measurements table m_builder = kudu.schema_builder() m_builder.add_column('record_time').type(kudu.string).nullable(False) for device_id in range(0, self._config['sensors']): m_builder.add_column('Sensor_%d' % device_id).type( kudu.double).nullable(True) m_builder.set_primary_keys(['record_time']) m_schema = m_builder.build() # Define partitioning schema m_partitioning = Partitioning().add_hash_partitions( column_names=['record_time'], num_buckets=3) # Create new table self._kudu_client.create_table('tag_mappings', tm_schema, tm_partitioning, n_replicas=3) self._kudu_client.create_table('raw_measurements', rm_schema, rm_partitioning, n_replicas=3) self._kudu_client.create_table('measurements', m_schema, m_partitioning, n_replicas=3)
def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client, unique_database): """Check that a single unbounded range partition gets printed correctly.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".unbounded_range_table" try: kudu_client.create_table( name, schema, partitioning=Partitioning().set_range_partition_columns(["id" ])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name) assert cursor.description == [('RANGE (id)', 'STRING', None, None, None, None, None)] assert cursor.fetchall() == [('UNBOUNDED', )] finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def test_table_without_partitioning(self, cursor, kudu_client, unique_database): """Test a Kudu table created without partitioning (i.e. equivalent to a single unbounded partition). It is not possible to create such a table in Impala, but it can be created directly in Kudu and then loaded as an external table. Regression test for IMPALA-5154.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() partitioning = Partitioning().set_range_partition_columns([]) name = "%s.one_big_unbounded_partition" % unique_database try: kudu_client.create_table(name, schema, partitioning=partitioning) kudu_table = kudu_client.table(name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (name, props)) with self.drop_impala_table_after_context(cursor, name): cursor.execute("INSERT INTO %s VALUES (1), (2), (3)" % name) cursor.execute("SELECT COUNT(*) FROM %s" % name) assert cursor.fetchall() == [(3, )] try: cursor.execute("SHOW RANGE PARTITIONS %s" % name) assert False except Exception as e: assert "AnalysisException: SHOW RANGE PARTITIONS requested but table does "\ "not have range partitions" in str(e) finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def test_conflicting_column_name(self, cursor, kudu_client, unique_database): """IMPALA-5283: Tests that loading an external Kudu table that was created with column names that differ only in case results in an error.""" table_name = '%s.kudu_external_test' % unique_database if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name) schema_builder = SchemaBuilder() col0 = 'col' schema_builder.add_column(col0, INT64).nullable(False).primary_key() col1 = 'COL' schema_builder.add_column(col1, INT64) schema = schema_builder.build() partitioning = Partitioning().set_range_partition_columns([col0])\ .add_range_partition([1], [10]) try: kudu_client.create_table(table_name, schema, partitioning) props = "tblproperties('kudu.table_name' = '%s')" % table_name cursor.execute("create external table %s stored as kudu %s" % (table_name, props)) assert False, 'create table should have resulted in an exception' except Exception as e: assert 'Error loading Kudu table: Impala does not support column names that ' \ + 'differ only in casing' in str(e) finally: if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name)
def test_column_name_case(self, cursor, kudu_client, unique_database): """IMPALA-5286: Tests that an external Kudu table that was created with a column name containing upper case letters is handled correctly.""" table_name = '%s.kudu_external_test' % unique_database if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name) schema_builder = SchemaBuilder() key_col = 'Key' schema_builder.add_column(key_col, INT64).nullable(False).primary_key() schema = schema_builder.build() partitioning = Partitioning().set_range_partition_columns([key_col])\ .add_range_partition([1], [10]) try: kudu_client.create_table(table_name, schema, partitioning) props = "tblproperties('kudu.table_name' = '%s')" % table_name cursor.execute("create external table %s stored as kudu %s" % (table_name, props)) # Perform a variety of operations on the table. cursor.execute("insert into %s (kEy) values (5), (1), (4)" % table_name) cursor.execute("select keY from %s where KeY %% 2 = 0" % table_name) assert cursor.fetchall() == [(4, )] cursor.execute("select * from %s order by kEY" % (table_name)) assert cursor.fetchall() == [(1, ), (4, ), (5, )] cursor.execute( "alter table %s add range partition 11 < values < 20" % table_name) new_key = "KEY2" cursor.execute("alter table %s change KEy %s bigint" % (table_name, new_key)) val_col = "vaL" cursor.execute("alter table %s add columns (%s bigint)" % (table_name, val_col)) cursor.execute("describe %s" % table_name) results = cursor.fetchall() # 'describe' should print the column name in lower case. assert new_key.lower() in results[0] assert val_col.lower() in results[1] cursor.execute("alter table %s drop column Val" % table_name) cursor.execute("describe %s" % table_name) assert len(cursor.fetchall()) == 1 cursor.execute( "alter table %s drop range partition 11 < values < 20" % table_name) finally: if kudu_client.table_exists(table_name): kudu_client.delete_table(table_name)
def partition(hash_columns: list, range_columns: list = None, bound: dict = None, bucket_num=3) -> Partitioning: # Define partitioning schema partition = Partitioning() for column in hash_columns: partition.add_hash_partitions(column_names=column, num_buckets=bucket_num) partition.set_range_partition_columns(range_columns) partition.add_range_partition( lower_bound=bound.get("lower_bound"), upper_bound=bound.get("upper_bound"), lower_bound_type=bound.get("lower_bound_type") or "inclusive", upper_bound_type=bound.get("upper_bound_type") or "exclusive" ) return partition
def test_create_table_with_different_owner(self): name = 'table_with_different_owner' try: self.client.create_table( name, self.schema, partitioning=Partitioning().add_hash_partitions(['key'], 2), owner='alice') self.assertEqual('alice', self.client.table(name).owner) finally: try: self.client.delete_table(name) except: pass
def test_create_table_with_different_comment(self): name = 'table_with_different_comment' try: self.client.create_table( name, self.schema, partitioning=Partitioning().add_hash_partitions(['key'], 2), comment='new comment') self.assertEqual('new comment', self.client.table(name).comment) finally: try: self.client.delete_table(name) except: pass
def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None, prepend_db_name=True, db_name=None, num_partitions=2): """Create and return a table. This function should be used in a "with" context. 'kudu' must be a kudu.client.Client. If a table name is not provided, a random name will be used. If 'prepend_db_name' is True, the table name will be prepended with (get_db_name() + "."). If column names are not provided, the letters "a", "b", "c", ... will be used. The number of partitions can be set using 'num_partitions'. Example: with self.temp_kudu_table(kudu, [INT32]) as kudu_table: assert kudu.table_exists(kudu_table.name) assert not kudu.table_exists(kudu_table.name) """ if not col_names: if len(col_types) > 26: raise Exception("Too many columns for default naming") col_names = [chr(97 + i) for i in xrange(len(col_types))] schema_builder = SchemaBuilder() for i, t in enumerate(col_types): column_spec = schema_builder.add_column(col_names[i], type_=t) if i < num_key_cols: column_spec.nullable(False) schema_builder.set_primary_keys(col_names[:num_key_cols]) schema = schema_builder.build() name = name or self.random_table_name() if prepend_db_name: name = (db_name or self.get_db_name().lower()) + "." + name kudu.create_table(name, schema, partitioning=Partitioning().add_hash_partitions( col_names[:num_key_cols], num_partitions)) try: yield kudu.table(name) finally: if kudu.table_exists(name): kudu.delete_table(name)
def test_create_table_with_different_replication_factors(self): name = "different_replica_table" # Test setting the number of replicas for 1, 3 and 5 provided that the # number does not exceed the number of tservers for n_replicas in [n for n in [1, 3, 5] if n <= self.NUM_TABLET_SERVERS]: try: self.client.create_table( name, self.schema, partitioning=Partitioning().add_hash_partitions(['key'], 2), n_replicas=n_replicas) assert n_replicas == self.client.table(name).num_replicas finally: try: self.client.delete_table(name) except: pass
def on_put(self, req, res, table): api = {'table': table, 'success': False} data = json.loads(req.bounded_stream.read().decode("utf-8")) client = kudu.connect(host='queen', port=7051) if not client.table_exists(table): builder = kudu.schema_builder() builder.add_column('_id').type( kudu.string).nullable(False).primary_key() if data: for i in data: if data[i] == 'string': builder.add_column(i).type(kudu.string) elif data[i] == 'int': builder.add_column(i).type(kudu.int64) elif data[i] == 'time': builder.add_column(i).type(kudu.unixtime_micros) elif data[i] == 'float': builder.add_column(i).type(kudu.float) elif data[i] == 'double': builder.add_column(i).type(kudu.float) elif data[i] == 'decimal': builder.add_column(i).type(kudu.decimal) elif data[i] == 'binary': builder.add_column(i).type(kudu.binary) elif data[i] == 'bool': builder.add_column(i).type(kudu.bool) else: builder.add_column(i).type(kudu.string) schema = builder.build() partitioning = Partitioning().add_hash_partitions( column_names=['_id'], num_buckets=3) client.create_table(table, schema, partitioning) api['success'] = True res.body = json.dumps(api) res.status = falcon.HTTP_200
def open_or_create_table(client, table, drop=False): """Based on the default dstat column names create a new table indexed by a timstamp col""" exists = False if client.table_exists(table): exists = True if drop: client.delete_table(table) exists = False if not exists: # Create the schema for the table, basically all float cols builder = kudu.schema_builder() builder.add_column("ts", kudu.int64, nullable=False, primary_key=True) for col in DSTAT_COL_NAMES: builder.add_column(col, kudu.float_) schema = builder.build() # Create hash partitioning buckets partitioning = Partitioning().add_hash_partitions('ts', 2) client.create_table(table, schema, partitioning) return client.table(table)
def test_external_timestamp_default_value(self, cursor, kudu_client, unique_database): """Checks that a Kudu table created outside Impala with a default value on a UNIXTIME_MICROS column can be loaded by Impala, and validates the DESCRIBE output is correct.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) column_spec = schema_builder.add_column("ts", UNIXTIME_MICROS) column_spec.default(datetime(2009, 1, 1, 0, 0, tzinfo=utc)) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".tsdefault" try: kudu_client.create_table( name, schema, partitioning=Partitioning().set_range_partition_columns(["id" ])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("DESCRIBE %s" % impala_table_name) table_desc = [[col.strip() if col else col for col in row] for row in cursor] # Pytest shows truncated output on failure, so print the details just in case. LOG.info(table_desc) assert ["ts", "timestamp", "", "false", "true", "1230768000000000", \ "AUTO_ENCODING", "DEFAULT_COMPRESSION", "0"] in table_desc finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def example_partitioning(cls): return Partitioning().set_range_partition_columns(['key'])
def setUpClass(self): """ Parent class for both the Scan tests and the Scan Token tests """ super(TestScanBase, self).setUpClass() self.nrows = 100 table = self.client.table(self.ex_table) session = self.client.new_session() tuples = [] for i in range(self.nrows): op = table.new_insert() tup = i, \ i * 2, \ 'hello_%d' % i if i % 2 == 0 else None, \ datetime.datetime.utcnow().replace(tzinfo=pytz.utc) op['key'] = tup[0] op['int_val'] = tup[1] if i % 2 == 0: op['string_val'] = tup[2] op['unixtime_micros_val'] = tup[3] session.apply(op) tuples.append(tup) session.flush() self.table = table self.tuples = [] # Replace missing values w/ defaults to test default values. for tuple in tuples: if tuple[2] == None: tuple = (tuple[0], tuple[1], 'nothing', tuple[3]) self.tuples.append(tuple) # Create table to test all types # for various predicate tests table_name = 'type-test' # Create schema, partitioning and then table builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False) builder.add_column('unixtime_micros_val', type_=kudu.unixtime_micros, nullable=False) if kudu.CLIENT_SUPPORTS_DECIMAL: builder.add_column('decimal_val', type_=kudu.decimal, precision=5, scale=2) builder.add_column('string_val', type_=kudu.string, compression=kudu.COMPRESSION_LZ4, encoding='prefix') builder.add_column('bool_val', type_=kudu.bool) builder.add_column('double_val', type_=kudu.double) builder.add_column('int8_val', type_=kudu.int8) builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix') builder.add_column('float_val', type_=kudu.float) builder.set_primary_keys(['key', 'unixtime_micros_val']) schema = builder.build() self.projected_names_w_o_float = [ col for col in schema.names if col != 'float_val' ] partitioning = Partitioning() \ .add_hash_partitions(column_names=['key'], num_buckets=3)\ .set_range_partition_columns(['unixtime_micros_val'])\ .add_range_partition( upper_bound={'unixtime_micros_val': ("2016-01-01", "%Y-%m-%d")}, upper_bound_type=kudu.EXCLUSIVE_BOUND )\ .add_range_partition( lower_bound={'unixtime_micros_val': datetime.datetime(2016, 1, 1)}, lower_bound_type='INCLUSIVE', upper_bound={'unixtime_micros_val': datetime.datetime(9999, 12, 31)} ) self.client.create_table(table_name, schema, partitioning) self.type_table = self.client.table(table_name) # Insert new rows if kudu.CLIENT_SUPPORTS_DECIMAL: self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), Decimal('111.11'), "Test One", True, 1.7976931348623157 * (10 ^ 308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10 ^ 38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), Decimal('0.99'), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] else: self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), "Test One", True, 1.7976931348623157 * (10 ^ 308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10 ^ 38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] session = self.client.new_session() for row in self.type_test_rows: op = self.type_table.new_insert(row) session.apply(op) session.flush() # Remove the float values from the type_test_rows tuples so we can # compare the other vals self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]
# Adding columns to Kudu Schema builder.add_column('wallet_id', kudu.int64, nullable=False) builder.add_column('txn_id', kudu.int64, nullable=False) builder.add_column('timestamp', kudu.string, nullable=False) for col in int_columns: builder.add_column(col, kudu.int64, nullable=False) for col in text_columns: builder.add_column(col, kudu.string, nullable=False) # double # Adding Primary Keys builder.set_primary_keys(['wallet_id', 'txn_id']) # Building Schema schema = builder.build() # Creating Table if client.table_exists('payment_history'): print(client.list_tables()) client.delete_table('payment_history') # Defining Partitioning Method # partitioning = Partitioning().add_hash_partitions('wallet_id', 2) # partitioning = Partitioning().set_range_partition_columns(['wallet_id']) partitioning = Partitioning().set_range_partition_columns([]) client.create_table('payment_history', schema, partitioning) print(schema) print("ok")
def setUpClass(self): """ Parent class for both the Scan tests and the Scan Token tests """ super(TestScanBase, self).setUpClass() self.nrows = 100 table = self.client.table(self.ex_table) session = self.client.new_session() tuples = [] for i in range(self.nrows): op = table.new_insert() tup = i, \ i * 2, \ 'hello_%d' % i if i % 2 == 0 else None, \ datetime.datetime.utcnow().replace(tzinfo=pytz.utc) op['key'] = tup[0] op['int_val'] = tup[1] if i % 2 == 0: op['string_val'] = tup[2] elif i % 3 == 0: op['string_val'] = None op['unixtime_micros_val'] = tup[3] session.apply(op) tuples.append(tup) session.flush() self.table = table self.tuples = tuples # Create table to test all types # for various predicate tests table_name = 'type-test' # Create schema, partitioning and then table builder = kudu.schema_builder() builder.add_column('key').type( kudu.int64).nullable(False).primary_key() builder.add_column('unixtime_micros_val', type_=kudu.unixtime_micros, nullable=False) builder.add_column('string_val', type_=kudu.string, compression=kudu.COMPRESSION_LZ4, encoding='prefix') builder.add_column('bool_val', type_=kudu.bool) builder.add_column('double_val', type_=kudu.double) builder.add_column('int8_val', type_=kudu.int8) builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix') builder.add_column('float_val', type_=kudu.float) schema = builder.build() self.projected_names_w_o_float = [ col for col in schema.names if col != 'float_val' ] partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) self.client.create_table(table_name, schema, partitioning) self.type_table = self.client.table(table_name) # Insert new rows self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), "Test One", True, 1.7976931348623157 * (10 ^ 308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10 ^ 38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] session = self.client.new_session() for row in self.type_test_rows: op = self.type_table.new_insert(row) session.apply(op) session.flush() # Remove the float values from the type_test_rows tuples so we can # compare the other vals self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]
#!/usr/bin/env python import time import kudu from kudu.client import Partitioning from datetime import datetime table_name = 'master_foo' # Mount/connect the Kudu queen client = kudu.connect(host='queen', port=7051) builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('name').type(kudu.string) schema = builder.build() partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) try: print('...try to open the table') table = client.table(table_name) except Exception as e: print('...create table') client.create_table(table_name, schema, partitioning) print('...wait 3 sec before access the table') time.sleep(3) table = client.table(table_name) no = 10000 for i in range(no): print('add row {}'.format(no-i)) op = table.new_insert({'key': i, 'name': 'foo{}'.format(i)})
def executeCommand(client, command, tableName): print("Executing Command {} on table {}".format(command, tableName)) if command == "create": # Creating a table requires just a few steps # - Define your schema # - Define your partitioning scheme # - Call the create_table API # Use the schema_builder to build your table's schema builder = kudu.schema_builder() # Lastname column builder.add_column('lastname').type('string').default( 'doe').compression('snappy').encoding('plain').nullable(False) # State/Province the person lives in # Leave all defaults except for the type and nullability builder.add_column('state_prov').type('string').nullable(False) builder.add_column('key').type(kudu.int64).nullable(False) # We prefer using dot notation, so let's add a few more columns # using that strategy # - type : We specify the string representation of types # - default: Default value if none specified # - compression: Compression type # - encoding: Encoding strategy # - nullable: Nullability # - block_size: Target block size, overriding server defaults builder.add_column('firstname').type('string').default( 'jane').compression('zlib').encoding('plain').nullable( False).block_size(20971520) # Use add_column list of parameters to specify properties # just as an example instead of dot notation. builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4') # Set our primary key column(s) builder.set_primary_keys(['lastname', 'state_prov', 'key']) # Build the schema schema = builder.build() # Define Hash partitioned column by the state/province # Its quite possible the data would then be skewed across partitions # so what we'll do here is add a the optional 3rd parameter to # help randomize the mapping of rows to hash buckets. partitioning = Partitioning().add_hash_partitions( column_names=['state_prov'], num_buckets=3, seed=13) # We've hash partitioned according to the state, now let's further # range partition our content by lastname. If we wanted to find all # the "Smith" families in the state of Oregon, we would very quickly # be able to isolate those rows with this type of schema. # Set the range partition columns - these columns MUST be part of # the primary key columns. partitioning.set_range_partition_columns('lastname') # Add range partitions partitioning.add_range_partition(['A'], ['E']) # By default, lower bound is inclusive while upper is exclusive partitioning.add_range_partition(['E'], ['Z'], upper_bound_type='inclusive') # Create new table passing in the table name, schema, partitioning # object and the optional parameter of number of replicas for this # table. If none specified, then it'll go by the Kudu server default # value for number of replicas. client.create_table(tableName, schema, partitioning, 1) elif command == "insert": # Open a table table = client.table(tableName) # Create a new session so that we can apply write operations session = client.new_session() # We have a few flush modes at our disposal, namely: # FLUSH_MANUAL, FLUSH_AUTO_SYNC and FLUSH_AUTO_BACKGROUND # The default is FLUSH_MANUAL, and we want to flush manually for # our examples below. Just providing example on how to change it # needed. session.set_flush_mode(kudu.FLUSH_MANUAL) # We can set a timeout value as well in milliseconds. Set ours to # 3 seconds. session.set_timeout_ms(3000) # Insert a row op = table.new_insert({ 'lastname': 'Smith', 'state_prov': 'ON', 'firstname': 'Mike', 'key': 1, 'ts_val': datetime.utcnow() }) session.apply(op) op = table.new_insert({ 'lastname': 'Smith', 'state_prov': 'ON', 'firstname': 'Mike', 'key': 1, 'ts_val': datetime.utcnow() }) session.apply(op) op = table.new_insert({ 'lastname': 'Smith', 'state_prov': 'ON', 'firstname': 'Mike', 'key': 1, 'ts_val': datetime.utcnow() }) session.apply(op) try: session.flush() except kudu.KuduBadStatus as e: (errorResult, overflowed) = session.get_pending_errors() print("Insert row failed: {} (more pending errors? {})".format( errorResult, overflowed))