예제 #1
0
파일: kudu.py 프로젝트: mycastiel/kudu
    def partition(hash_columns: list, range_columns: list = None, bound: dict = None, bucket_num=3) -> Partitioning:
        # Define partitioning schema
        partition = Partitioning()
        for column in hash_columns:
            partition.add_hash_partitions(column_names=column, num_buckets=bucket_num)

        partition.set_range_partition_columns(range_columns)
        partition.add_range_partition(
            lower_bound=bound.get("lower_bound"),
            upper_bound=bound.get("upper_bound"),
            lower_bound_type=bound.get("lower_bound_type") or "inclusive",
            upper_bound_type=bound.get("upper_bound_type") or "exclusive"
        )

        return partition
def executeCommand(client, command, tableName):
    print("Executing Command {} on table {}".format(command, tableName))

    if command == "create":
        # Creating a table requires just a few steps
        # - Define your schema
        # - Define your partitioning scheme
        # - Call the create_table API

        # Use the schema_builder to build your table's schema
        builder = kudu.schema_builder()

        # Lastname column
        builder.add_column('lastname').type('string').default(
            'doe').compression('snappy').encoding('plain').nullable(False)

        # State/Province the person lives in
        # Leave all defaults except for the type and nullability
        builder.add_column('state_prov').type('string').nullable(False)

        builder.add_column('key').type(kudu.int64).nullable(False)

        # We prefer using dot notation, so let's add a few more columns
        # using that strategy
        #  - type : We specify the string representation of types
        #  - default: Default value if none specified
        #  - compression: Compression type
        #  - encoding: Encoding strategy
        #  - nullable: Nullability
        #  - block_size: Target block size, overriding server defaults
        builder.add_column('firstname').type('string').default(
            'jane').compression('zlib').encoding('plain').nullable(
                False).block_size(20971520)

        # Use add_column list of parameters to specify properties
        # just as an example instead of dot notation.
        builder.add_column('ts_val',
                           type_=kudu.unixtime_micros,
                           nullable=False,
                           compression='lz4')

        # Set our primary key column(s)
        builder.set_primary_keys(['lastname', 'state_prov', 'key'])

        # Build the schema
        schema = builder.build()

        # Define Hash partitioned column by the state/province
        # Its quite possible the data would then be skewed across partitions
        # so what we'll do here is add a the optional 3rd parameter to
        # help randomize the mapping of rows to hash buckets.
        partitioning = Partitioning().add_hash_partitions(
            column_names=['state_prov'], num_buckets=3, seed=13)

        # We've hash partitioned according to the state, now let's further
        # range partition our content by lastname. If we wanted to find all
        # the "Smith" families in the state of Oregon, we would very quickly
        # be able to isolate those rows with this type of schema.
        # Set the range partition columns - these columns MUST be part of
        # the primary key columns.
        partitioning.set_range_partition_columns('lastname')
        # Add range partitions
        partitioning.add_range_partition(['A'], ['E'])
        # By default, lower bound is inclusive while upper is exclusive
        partitioning.add_range_partition(['E'], ['Z'],
                                         upper_bound_type='inclusive')

        # Create new table passing in the table name, schema, partitioning
        # object and the optional parameter of number of replicas for this
        # table. If none specified, then it'll go by the Kudu server default
        # value for number of replicas.
        client.create_table(tableName, schema, partitioning, 1)
    elif command == "insert":
        # Open a table
        table = client.table(tableName)

        # Create a new session so that we can apply write operations
        session = client.new_session()

        # We have a few flush modes at our disposal, namely:
        # FLUSH_MANUAL, FLUSH_AUTO_SYNC and FLUSH_AUTO_BACKGROUND
        # The default is FLUSH_MANUAL, and we want to flush manually for
        # our examples below. Just providing example on how to change it
        # needed.
        session.set_flush_mode(kudu.FLUSH_MANUAL)

        # We can set a timeout value as well in milliseconds. Set ours to
        # 3 seconds.
        session.set_timeout_ms(3000)

        # Insert a row
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        try:
            session.flush()
        except kudu.KuduBadStatus as e:
            (errorResult, overflowed) = session.get_pending_errors()
            print("Insert row failed: {} (more pending errors? {})".format(
                errorResult, overflowed))