def test_index_projection_with_schema(self): scanner = self.table.scanner() scanner.set_projected_column_indexes([0, 1]) scanner.set_fault_tolerant() scanner.open() tuples = scanner.read_all_tuples() # Build schema to check against builder = kudu.schema_builder() builder.add_column('key', kudu.int32, nullable=False) builder.add_column('int_val', kudu.int32) builder.set_primary_keys(['key']) expected_schema = builder.build() # Build new schema from projection schema builder = kudu.schema_builder() for col in scanner.get_projection_schema(): builder.copy_column(col) builder.set_primary_keys(['key']) new_schema = builder.build() self.assertEqual(tuples, [t[0:2] for t in self.tuples]) self.assertTrue(expected_schema.equals(new_schema))
def create_tables(self): for table in ['measurements']: if self._kudu_client.table_exists(table): self._kudu_client.delete_table(table) # Define a schema for a tag_mappings table tm_builder = kudu.schema_builder() tm_builder.add_column('tag_id').type( kudu.int32).nullable(False).primary_key() tm_builder.add_column('sensor_name').type(kudu.string).nullable(False) tm_schema = tm_builder.build() # Define partitioning schema tm_partitioning = Partitioning().add_hash_partitions( column_names=['tag_id'], num_buckets=3) # Define a schema for a raw_measurements table rm_builder = kudu.schema_builder() rm_builder.add_column('record_time').type(kudu.string).nullable(False) rm_builder.add_column('tag_id').type(kudu.int32).nullable(False) rm_builder.add_column('value').type(kudu.double).nullable(False) rm_builder.set_primary_keys(['record_time', 'tag_id']) rm_schema = rm_builder.build() # Define partitioning schema rm_partitioning = Partitioning().add_hash_partitions( column_names=['record_time', 'tag_id'], num_buckets=3) # Define a schema for a measurements table m_builder = kudu.schema_builder() m_builder.add_column('record_time').type(kudu.string).nullable(False) for device_id in range(0, self._config['sensors']): m_builder.add_column('Sensor_%d' % device_id).type( kudu.double).nullable(True) m_builder.set_primary_keys(['record_time']) m_schema = m_builder.build() # Define partitioning schema m_partitioning = Partitioning().add_hash_partitions( column_names=['record_time'], num_buckets=3) # Create new table self._kudu_client.create_table('tag_mappings', tm_schema, tm_partitioning, n_replicas=3) self._kudu_client.create_table('raw_measurements', rm_schema, rm_partitioning, n_replicas=3) self._kudu_client.create_table('measurements', m_schema, m_partitioning, n_replicas=3)
def test_set_column_spec_pk(self): builder = kudu.schema_builder() key = (builder.add_column('key', 'int64', nullable=False) .primary_key()) assert key is not None schema = builder.build() assert 'key' in schema.primary_keys() builder = kudu.schema_builder() key = (builder.add_column('key', 'int64', nullable=False, primary_key=True)) schema = builder.build() assert 'key' in schema.primary_keys()
def test_varchar_invalid_length(self): builder = kudu.schema_builder() (builder.add_column('key').type('varchar').primary_key().length( 0).nullable(False)) with self.assertRaises(kudu.KuduInvalidArgument): builder.build()
def test_length_on_non_varchar_column(self): builder = kudu.schema_builder() (builder.add_column('key').type('decimal').primary_key().nullable( False).length(10)) with self.assertRaises(kudu.KuduInvalidArgument): builder.build()
def test_kudu_schema_convert(self): spec = [ # name, type, is_nullable, is_primary_key ('a', dt.Int8(False), 'int8', False, True), ('b', dt.Int16(False), 'int16', False, True), ('c', dt.Int32(False), 'int32', False, False), ('d', dt.Int64(True), 'int64', True, False), ('e', dt.String(True), 'string', True, False), ('f', dt.Boolean(False), 'bool', False, False), ('g', dt.Float(False), 'float', False, False), ('h', dt.Double(True), 'double', True, False), # TODO # ('i', 'binary', False, False), ('j', dt.Timestamp(True), 'timestamp', True, False) ] builder = kudu.schema_builder() primary_keys = [] ibis_types = [] for name, itype, type_, is_nullable, is_primary_key in spec: builder.add_column(name, type_, nullable=is_nullable) if is_primary_key: primary_keys.append(name) ibis_types.append((name, itype)) builder.set_primary_keys(primary_keys) kschema = builder.build() ischema = ksupport.schema_kudu_to_ibis(kschema) expected = ibis.schema(ibis_types) assert_equal(ischema, expected)
def test_precision_on_non_decimal_column(self): builder = kudu.schema_builder() (builder.add_column('key').type('int32').primary_key().nullable( False).precision(9).scale(2)) with self.assertRaises(kudu.KuduInvalidArgument): builder.build()
def example_schema(cls): builder = kudu.schema_builder() builder.add_column('key', kudu.int32, nullable=False) builder.add_column('int_val', kudu.int32) builder.add_column('string_val', kudu.string) builder.set_primary_keys(['key']) return builder.build()
def test_type(self): builder = kudu.schema_builder() (builder.add_column('key').type('int32').primary_key().nullable(False)) schema = builder.build() tp = schema[0].type assert tp.name == 'int32' assert tp.type == kudu.schema.INT32
def test_schema_equals(self): assert self.schema.equals(self.schema) builder = kudu.schema_builder() builder.add_column('key', 'int64', nullable=False, primary_key=True) schema = builder.build() assert not self.schema.equals(schema)
def test_unsupported_col_spec_methods_for_create_table(self): builder = kudu.schema_builder() builder.add_column('test', 'int64').rename('test') with self.assertRaises(kudu.KuduNotSupported): builder.build() builder.add_column('test', 'int64').remove_default() with self.assertRaises(kudu.KuduNotSupported): builder.build()
def test_date(self): builder = kudu.schema_builder() (builder.add_column('key').type('date').primary_key().nullable(False)) schema = builder.build() column = schema[0] tp = column.type assert tp.name == 'date' assert tp.type == kudu.schema.DATE
def example_schema(cls): builder = kudu.schema_builder() builder.add_column('key', kudu.int32, nullable=False) builder.add_column('int_val', kudu.int32) builder.add_column('string_val', kudu.string, default='nothing') builder.add_column('unixtime_micros_val', kudu.unixtime_micros) builder.set_primary_keys(['key']) return builder.build()
def test_decimal_without_precision(self): builder = kudu.schema_builder() (builder.add_column('key') .type('decimal') .primary_key() .nullable(False)) with self.assertRaises(kudu.KuduInvalidArgument): builder.build()
def test_type(self): builder = kudu.schema_builder() (builder.add_column('key') .type('int32') .primary_key() .nullable(False)) schema = builder.build() tp = schema[0].type assert tp.name == 'int32' assert tp.type == kudu.schema.INT32
def test_varchar(self): builder = kudu.schema_builder() (builder.add_column('key').type('varchar').primary_key().nullable( False).length(10)) schema = builder.build() column = schema[0] tp = column.type assert tp.name == 'varchar' assert tp.type == kudu.schema.VARCHAR ta = column.type_attributes assert ta.length == 10
def setUp(self): self.columns = [('one', 'int32', False), ('two', 'int8', False), ('three', 'double', True), ('four', 'string', False)] self.primary_keys = ['one', 'two'] self.builder = kudu.schema_builder() for name, typename, nullable in self.columns: self.builder.add_column(name, typename, nullable=nullable) self.builder.set_primary_keys(self.primary_keys) self.schema = self.builder.build()
def test_decimal(self): builder = kudu.schema_builder() (builder.add_column('key').type('decimal').primary_key().nullable( False).precision(9).scale(2)) schema = builder.build() column = schema[0] tp = column.type assert tp.name == 'decimal' assert tp.type == kudu.schema.DECIMAL ta = column.type_attributes assert ta.precision == 9 assert ta.scale == 2
def test_compression(self): builder = kudu.schema_builder() builder.add_column('key', 'int64', nullable=False) foo = builder.add_column('foo', 'string').compression('lz4') assert foo is not None bar = builder.add_column('bar', 'string') bar.compression(kudu.COMPRESSION_ZLIB) with self.assertRaises(ValueError): bar = builder.add_column('qux', 'string', compression='unknown') builder.set_primary_keys(['key']) builder.build()
def test_encoding(self): builder = kudu.schema_builder() builder.add_column('key', 'int64', nullable=False) foo = builder.add_column('foo', 'string').encoding('rle') assert foo is not None bar = builder.add_column('bar', 'string') bar.encoding(kudu.ENCODING_PLAIN) with self.assertRaises(ValueError): builder.add_column('qux', 'string', encoding='unknown') builder.set_primary_keys(['key']) builder.build()
def test_nullable_not_null(self): builder = kudu.schema_builder() (builder.add_column('key', 'int64', nullable=False).primary_key()) builder.add_column('data1', 'double').nullable(True) builder.add_column('data2', 'double').nullable(False) builder.add_column('data3', 'double', nullable=True) builder.add_column('data4', 'double', nullable=False) schema = builder.build() assert not schema[0].nullable assert schema[1].nullable assert not schema[2].nullable assert schema[3].nullable assert not schema[4].nullable
def test_decimal(self): builder = kudu.schema_builder() (builder.add_column('key') .type('decimal') .primary_key() .nullable(False) .precision(9) .scale(2)) schema = builder.build() column = schema[0] tp = column.type assert tp.name == 'decimal' assert tp.type == kudu.schema.DECIMAL ta = column.type_attributes assert ta.precision == 9 assert ta.scale == 2
def test_nullable_not_null(self): builder = kudu.schema_builder() (builder.add_column('key', 'int64', nullable=False) .primary_key()) builder.add_column('data1', 'double').nullable(True) builder.add_column('data2', 'double').nullable(False) builder.add_column('data3', 'double', nullable=True) builder.add_column('data4', 'double', nullable=False) schema = builder.build() assert not schema[0].nullable assert schema[1].nullable assert not schema[2].nullable assert schema[3].nullable assert not schema[4].nullable
def test_encoding(self): builder = kudu.schema_builder() builder.add_column('key', 'int64', nullable=False) available_encodings = ['auto', 'plain', 'prefix', 'bitshuffle', 'rle', 'dict', kudu.ENCODING_DICT] for enc in available_encodings: foo = builder.add_column('foo_%s' % enc, 'string').encoding(enc) assert foo is not None del foo bar = builder.add_column('bar', 'string') bar.encoding(kudu.ENCODING_PLAIN) with self.assertRaises(ValueError): builder.add_column('qux', 'string', encoding='unknown') builder.set_primary_keys(['key']) builder.build()
def test_encoding(self): builder = kudu.schema_builder() builder.add_column('key', 'int64', nullable=False) available_encodings = [ 'auto', 'plain', 'prefix', 'bitshuffle', 'rle', 'dict', kudu.ENCODING_DICT ] for enc in available_encodings: foo = builder.add_column('foo_%s' % enc, 'string').encoding(enc) assert foo is not None del foo bar = builder.add_column('bar', 'string') bar.encoding(kudu.ENCODING_PLAIN) with self.assertRaises(ValueError): builder.add_column('qux', 'string', encoding='unknown') builder.set_primary_keys(['key']) builder.build()
def on_put(self, req, res, table): api = {'table': table, 'success': False} data = json.loads(req.bounded_stream.read().decode("utf-8")) client = kudu.connect(host='queen', port=7051) if not client.table_exists(table): builder = kudu.schema_builder() builder.add_column('_id').type( kudu.string).nullable(False).primary_key() if data: for i in data: if data[i] == 'string': builder.add_column(i).type(kudu.string) elif data[i] == 'int': builder.add_column(i).type(kudu.int64) elif data[i] == 'time': builder.add_column(i).type(kudu.unixtime_micros) elif data[i] == 'float': builder.add_column(i).type(kudu.float) elif data[i] == 'double': builder.add_column(i).type(kudu.float) elif data[i] == 'decimal': builder.add_column(i).type(kudu.decimal) elif data[i] == 'binary': builder.add_column(i).type(kudu.binary) elif data[i] == 'bool': builder.add_column(i).type(kudu.bool) else: builder.add_column(i).type(kudu.string) schema = builder.build() partitioning = Partitioning().add_hash_partitions( column_names=['_id'], num_buckets=3) client.create_table(table, schema, partitioning) api['success'] = True res.body = json.dumps(api) res.status = falcon.HTTP_200
def open_or_create_table(client, table, drop=False): """Based on the default dstat column names create a new table indexed by a timstamp col""" exists = False if client.table_exists(table): exists = True if drop: client.delete_table(table) exists = False if not exists: # Create the schema for the table, basically all float cols builder = kudu.schema_builder() builder.add_column("ts", kudu.int64, nullable=False, primary_key=True) for col in DSTAT_COL_NAMES: builder.add_column(col, kudu.float_) schema = builder.build() # Create hash partitioning buckets partitioning = Partitioning().add_hash_partitions('ts', 2) client.create_table(table, schema, partitioning) return client.table(table)
def setUpClass(self): """ Parent class for both the Scan tests and the Scan Token tests """ super(TestScanBase, self).setUpClass() self.nrows = 100 table = self.client.table(self.ex_table) session = self.client.new_session() tuples = [] for i in range(self.nrows): op = table.new_insert() tup = i, \ i * 2, \ 'hello_%d' % i if i % 2 == 0 else None, \ datetime.datetime.utcnow().replace(tzinfo=pytz.utc) op['key'] = tup[0] op['int_val'] = tup[1] if i % 2 == 0: op['string_val'] = tup[2] op['unixtime_micros_val'] = tup[3] session.apply(op) tuples.append(tup) session.flush() self.table = table self.tuples = tuples # Create table to test all types # for various predicate tests table_name = 'type-test' # Create schema, partitioning and then table builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False) builder.add_column('unixtime_micros_val', type_=kudu.unixtime_micros, nullable=False) if kudu.CLIENT_SUPPORTS_DECIMAL: builder.add_column('decimal_val', type_=kudu.decimal, precision=5, scale=2) builder.add_column('string_val', type_=kudu.string, compression=kudu.COMPRESSION_LZ4, encoding='prefix') builder.add_column('bool_val', type_=kudu.bool) builder.add_column('double_val', type_=kudu.double) builder.add_column('int8_val', type_=kudu.int8) builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix') builder.add_column('float_val', type_=kudu.float) builder.set_primary_keys(['key', 'unixtime_micros_val']) schema = builder.build() self.projected_names_w_o_float = [ col for col in schema.names if col != 'float_val' ] partitioning = Partitioning() \ .add_hash_partitions(column_names=['key'], num_buckets=3)\ .set_range_partition_columns(['unixtime_micros_val'])\ .add_range_partition( upper_bound={'unixtime_micros_val': ("2016-01-01", "%Y-%m-%d")}, upper_bound_type=kudu.EXCLUSIVE_BOUND )\ .add_range_partition( lower_bound={'unixtime_micros_val': datetime.datetime(2016, 1, 1)}, lower_bound_type='INCLUSIVE', upper_bound={'unixtime_micros_val': datetime.datetime(9999, 12, 31)} ) self.client.create_table(table_name, schema, partitioning) self.type_table = self.client.table(table_name) # Insert new rows if kudu.CLIENT_SUPPORTS_DECIMAL: self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), Decimal('111.11'), "Test One", True, 1.7976931348623157 * (10^308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10^38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), Decimal('0.99'), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] else: self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), "Test One", True, 1.7976931348623157 * (10 ^ 308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10 ^ 38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] session = self.client.new_session() for row in self.type_test_rows: op = self.type_table.new_insert(row) session.apply(op) session.flush() # Remove the float values from the type_test_rows tuples so we can # compare the other vals self.type_test_rows = [ tuple[:-1] for tuple in self.type_test_rows ]
def setUpClass(self): """ Parent class for both the Scan tests and the Scan Token tests """ super(TestScanBase, self).setUpClass() self.nrows = 100 table = self.client.table(self.ex_table) session = self.client.new_session() tuples = [] for i in range(self.nrows): op = table.new_insert() tup = i, \ i * 2, \ 'hello_%d' % i if i % 2 == 0 else None, \ datetime.datetime.utcnow().replace(tzinfo=pytz.utc) op['key'] = tup[0] op['int_val'] = tup[1] if i % 2 == 0: op['string_val'] = tup[2] elif i % 3 == 0: op['string_val'] = None op['unixtime_micros_val'] = tup[3] session.apply(op) tuples.append(tup) session.flush() self.table = table self.tuples = tuples # Create table to test all types # for various predicate tests table_name = 'type-test' # Create schema, partitioning and then table builder = kudu.schema_builder() builder.add_column('key').type( kudu.int64).nullable(False).primary_key() builder.add_column('unixtime_micros_val', type_=kudu.unixtime_micros, nullable=False) builder.add_column('string_val', type_=kudu.string, compression=kudu.COMPRESSION_LZ4, encoding='prefix') builder.add_column('bool_val', type_=kudu.bool) builder.add_column('double_val', type_=kudu.double) builder.add_column('int8_val', type_=kudu.int8) builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix') builder.add_column('float_val', type_=kudu.float) schema = builder.build() self.projected_names_w_o_float = [ col for col in schema.names if col != 'float_val' ] partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) self.client.create_table(table_name, schema, partitioning) self.type_table = self.client.table(table_name) # Insert new rows self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), "Test One", True, 1.7976931348623157 * (10 ^ 308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10 ^ 38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] session = self.client.new_session() for row in self.type_test_rows: op = self.type_table.new_insert(row) session.apply(op) session.flush() # Remove the float values from the type_test_rows tuples so we can # compare the other vals self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]
def builder() -> kudu.schema: return kudu.schema_builder()
#!/usr/bin/env python import time import kudu from kudu.client import Partitioning from datetime import datetime table_name = 'master_foo' # Mount/connect the Kudu queen client = kudu.connect(host='queen', port=7051) builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('name').type(kudu.string) schema = builder.build() partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) try: print('...try to open the table') table = client.table(table_name) except Exception as e: print('...create table') client.create_table(table_name, schema, partitioning) print('...wait 3 sec before access the table') time.sleep(3) table = client.table(table_name) no = 10000 for i in range(no): print('add row {}'.format(no-i)) op = table.new_insert({'key': i, 'name': 'foo{}'.format(i)})
def setUpClass(self): """ Parent class for both the Scan tests and the Scan Token tests """ super(TestScanBase, self).setUpClass() self.nrows = 100 table = self.client.table(self.ex_table) session = self.client.new_session() tuples = [] for i in range(self.nrows): op = table.new_insert() tup = i, \ i * 2, \ 'hello_%d' % i if i % 2 == 0 else None, \ datetime.datetime.utcnow().replace(tzinfo=pytz.utc) op['key'] = tup[0] op['int_val'] = tup[1] if i % 2 == 0: op['string_val'] = tup[2] op['unixtime_micros_val'] = tup[3] session.apply(op) tuples.append(tup) session.flush() self.table = table self.tuples = [] # Replace missing values w/ defaults to test default values. for tuple in tuples: if tuple[2] == None: tuple = (tuple[0], tuple[1], 'nothing', tuple[3]) self.tuples.append(tuple) # Create table to test all types # for various predicate tests table_name = 'type-test' # Create schema, partitioning and then table builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False) builder.add_column('unixtime_micros_val', type_=kudu.unixtime_micros, nullable=False) if kudu.CLIENT_SUPPORTS_DECIMAL: builder.add_column('decimal_val', type_=kudu.decimal, precision=5, scale=2) builder.add_column('string_val', type_=kudu.string, compression=kudu.COMPRESSION_LZ4, encoding='prefix') builder.add_column('bool_val', type_=kudu.bool) builder.add_column('double_val', type_=kudu.double) builder.add_column('int8_val', type_=kudu.int8) builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix') builder.add_column('float_val', type_=kudu.float) builder.set_primary_keys(['key', 'unixtime_micros_val']) schema = builder.build() self.projected_names_w_o_float = [ col for col in schema.names if col != 'float_val' ] partitioning = Partitioning() \ .add_hash_partitions(column_names=['key'], num_buckets=3)\ .set_range_partition_columns(['unixtime_micros_val'])\ .add_range_partition( upper_bound={'unixtime_micros_val': ("2016-01-01", "%Y-%m-%d")}, upper_bound_type=kudu.EXCLUSIVE_BOUND )\ .add_range_partition( lower_bound={'unixtime_micros_val': datetime.datetime(2016, 1, 1)}, lower_bound_type='INCLUSIVE', upper_bound={'unixtime_micros_val': datetime.datetime(9999, 12, 31)} ) self.client.create_table(table_name, schema, partitioning) self.type_table = self.client.table(table_name) # Insert new rows if kudu.CLIENT_SUPPORTS_DECIMAL: self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), Decimal('111.11'), "Test One", True, 1.7976931348623157 * (10 ^ 308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10 ^ 38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), Decimal('0.99'), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] else: self.type_test_rows = [ (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), "Test One", True, 1.7976931348623157 * (10 ^ 308), 127, b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1', 3.402823 * (10 ^ 38)), (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二", False, 200.1, -1, b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f', -150.2) ] session = self.client.new_session() for row in self.type_test_rows: op = self.type_table.new_insert(row) session.apply(op) session.flush() # Remove the float values from the type_test_rows tuples so we can # compare the other vals self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]
def executeCommand(client, command, tableName): print("Executing Command {} on table {}".format(command, tableName)) if command == "create": # Creating a table requires just a few steps # - Define your schema # - Define your partitioning scheme # - Call the create_table API # Use the schema_builder to build your table's schema builder = kudu.schema_builder() # Lastname column builder.add_column('lastname').type('string').default( 'doe').compression('snappy').encoding('plain').nullable(False) # State/Province the person lives in # Leave all defaults except for the type and nullability builder.add_column('state_prov').type('string').nullable(False) builder.add_column('key').type(kudu.int64).nullable(False) # We prefer using dot notation, so let's add a few more columns # using that strategy # - type : We specify the string representation of types # - default: Default value if none specified # - compression: Compression type # - encoding: Encoding strategy # - nullable: Nullability # - block_size: Target block size, overriding server defaults builder.add_column('firstname').type('string').default( 'jane').compression('zlib').encoding('plain').nullable( False).block_size(20971520) # Use add_column list of parameters to specify properties # just as an example instead of dot notation. builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4') # Set our primary key column(s) builder.set_primary_keys(['lastname', 'state_prov', 'key']) # Build the schema schema = builder.build() # Define Hash partitioned column by the state/province # Its quite possible the data would then be skewed across partitions # so what we'll do here is add a the optional 3rd parameter to # help randomize the mapping of rows to hash buckets. partitioning = Partitioning().add_hash_partitions( column_names=['state_prov'], num_buckets=3, seed=13) # We've hash partitioned according to the state, now let's further # range partition our content by lastname. If we wanted to find all # the "Smith" families in the state of Oregon, we would very quickly # be able to isolate those rows with this type of schema. # Set the range partition columns - these columns MUST be part of # the primary key columns. partitioning.set_range_partition_columns('lastname') # Add range partitions partitioning.add_range_partition(['A'], ['E']) # By default, lower bound is inclusive while upper is exclusive partitioning.add_range_partition(['E'], ['Z'], upper_bound_type='inclusive') # Create new table passing in the table name, schema, partitioning # object and the optional parameter of number of replicas for this # table. If none specified, then it'll go by the Kudu server default # value for number of replicas. client.create_table(tableName, schema, partitioning, 1) elif command == "insert": # Open a table table = client.table(tableName) # Create a new session so that we can apply write operations session = client.new_session() # We have a few flush modes at our disposal, namely: # FLUSH_MANUAL, FLUSH_AUTO_SYNC and FLUSH_AUTO_BACKGROUND # The default is FLUSH_MANUAL, and we want to flush manually for # our examples below. Just providing example on how to change it # needed. session.set_flush_mode(kudu.FLUSH_MANUAL) # We can set a timeout value as well in milliseconds. Set ours to # 3 seconds. session.set_timeout_ms(3000) # Insert a row op = table.new_insert({ 'lastname': 'Smith', 'state_prov': 'ON', 'firstname': 'Mike', 'key': 1, 'ts_val': datetime.utcnow() }) session.apply(op) op = table.new_insert({ 'lastname': 'Smith', 'state_prov': 'ON', 'firstname': 'Mike', 'key': 1, 'ts_val': datetime.utcnow() }) session.apply(op) op = table.new_insert({ 'lastname': 'Smith', 'state_prov': 'ON', 'firstname': 'Mike', 'key': 1, 'ts_val': datetime.utcnow() }) session.apply(op) try: session.flush() except kudu.KuduBadStatus as e: (errorResult, overflowed) = session.get_pending_errors() print("Insert row failed: {} (more pending errors? {})".format( errorResult, overflowed))
# Parse arguments parser = argparse.ArgumentParser(description='Basic Example for Kudu Python.') parser.add_argument('--masters', '-m', nargs='+', default='localhost', help='The master address(es) to connect to Kudu.') parser.add_argument('--ports', '-p', nargs='+', default='7051', help='The master server port(s) to connect to Kudu.') args = parser.parse_args() # Connect to Kudu master server(s). client = kudu.connect(host=args.masters, port=args.ports) # Define a schema for a new table. builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4') schema = builder.build() # Define the partitioning schema. partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) # Delete table if it already exists. if client.table_exists('python-example'): client.delete_table('python-example') # Create a new table. client.create_table('python-example', schema, partitioning) # Open a table.