예제 #1
0
def extract_data_view_specs(udfs):
    extracted_udf_data_view_specs = []
    for udf in udfs:
        udf_data_view_specs_proto = udf.specs
        if udf_data_view_specs_proto is None:
            extracted_udf_data_view_specs.append([])
        extracted_specs = []
        for spec_proto in udf_data_view_specs_proto:
            state_id = spec_proto.name
            field_index = spec_proto.field_index
            if spec_proto.HasField("list_view"):
                element_coder = from_proto(spec_proto.list_view.element_type)
                extracted_specs.append(
                    ListViewSpec(state_id, field_index, element_coder))
            elif spec_proto.HasField("map_view"):
                key_coder = from_proto(spec_proto.map_view.key_type)
                value_coder = from_proto(spec_proto.map_view.value_type)
                extracted_specs.append(
                    MapViewSpec(state_id, field_index, key_coder, value_coder))
            else:
                raise Exception("Unsupported data view spec type: " +
                                spec_proto.type)
        extracted_udf_data_view_specs.append(extracted_specs)
    if all([len(i) == 0 for i in extracted_udf_data_view_specs]):
        return []
    return extracted_udf_data_view_specs
예제 #2
0
def _create_user_defined_function_operation(factory, transform_proto,
                                            consumers, udfs_proto,
                                            beam_operation_cls,
                                            internal_operation_cls):
    output_tags = list(transform_proto.outputs.keys())
    output_coders = factory.get_output_coders(transform_proto)
    spec = operation_specs.WorkerDoFn(
        serialized_fn=udfs_proto,
        output_tags=output_tags,
        input=None,
        side_inputs=None,
        output_coders=[output_coders[tag] for tag in output_tags])

    if hasattr(spec.serialized_fn, "key_type"):
        # keyed operation, need to create the KeyedStateBackend.
        key_row_coder = from_proto(spec.serialized_fn.key_type)
        keyed_state_backend = RemoteKeyedStateBackend(
            factory.state_handler, key_row_coder,
            spec.serialized_fn.state_cache_size,
            spec.serialized_fn.map_state_read_cache_size,
            spec.serialized_fn.map_state_write_cache_size)

        return beam_operation_cls(transform_proto.unique_name, spec,
                                  factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls, keyed_state_backend)
    else:
        return beam_operation_cls(transform_proto.unique_name, spec,
                                  factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls)
예제 #3
0
def extract_data_view_specs(udfs):
    extracted_udf_data_view_specs = []
    current_index = -1
    for udf in udfs:
        current_index += 1
        udf_data_view_specs_proto = udf.specs
        if not udf_data_view_specs_proto:
            if is_built_in_function(udf.payload):
                built_in_function = load_aggregate_function(udf.payload)
                accumulator = built_in_function.create_accumulator()
                extracted_udf_data_view_specs.append(
                    extract_data_view_specs_from_accumulator(
                        current_index, accumulator))
            else:
                extracted_udf_data_view_specs.append([])
        else:
            extracted_specs = []
            for spec_proto in udf_data_view_specs_proto:
                state_id = spec_proto.name
                field_index = spec_proto.field_index
                if spec_proto.HasField("list_view"):
                    element_coder = from_proto(
                        spec_proto.list_view.element_type)
                    extracted_specs.append(
                        ListViewSpec(state_id, field_index, element_coder))
                elif spec_proto.HasField("map_view"):
                    key_coder = from_proto(spec_proto.map_view.key_type)
                    value_coder = from_proto(spec_proto.map_view.value_type)
                    extracted_specs.append(
                        MapViewSpec(state_id, field_index, key_coder,
                                    value_coder))
                else:
                    raise Exception("Unsupported data view spec type: " +
                                    spec_proto.type)
            extracted_udf_data_view_specs.append(extracted_specs)
    if all([len(i) == 0 for i in extracted_udf_data_view_specs]):
        return []
    return extracted_udf_data_view_specs
예제 #4
0
def _create_user_defined_function_operation(factory, transform_proto,
                                            consumers, udfs_proto,
                                            beam_operation_cls,
                                            internal_operation_cls):
    output_tags = list(transform_proto.outputs.keys())
    output_coders = factory.get_output_coders(transform_proto)
    spec = operation_specs.WorkerDoFn(
        serialized_fn=udfs_proto,
        output_tags=output_tags,
        input=None,
        side_inputs=None,
        output_coders=[output_coders[tag] for tag in output_tags])
    name = common.NameContext(transform_proto.unique_name)

    serialized_fn = spec.serialized_fn
    if hasattr(serialized_fn, "key_type"):
        # keyed operation, need to create the KeyedStateBackend.
        row_schema = serialized_fn.key_type.row_schema
        key_row_coder = FlattenRowCoder(
            [from_proto(f.type) for f in row_schema.fields])
        if serialized_fn.HasField('group_window'):
            if serialized_fn.group_window.is_time_window:
                window_coder = TimeWindowCoder()
            else:
                window_coder = CountWindowCoder()
        else:
            window_coder = None
        keyed_state_backend = RemoteKeyedStateBackend(
            factory.state_handler, key_row_coder, window_coder,
            serialized_fn.state_cache_size,
            serialized_fn.map_state_read_cache_size,
            serialized_fn.map_state_write_cache_size)

        return beam_operation_cls(name, spec, factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls, keyed_state_backend)
    elif internal_operation_cls == datastream_operations.StatefulOperation:
        key_row_coder = from_type_info_proto(serialized_fn.key_type_info)
        keyed_state_backend = RemoteKeyedStateBackend(
            factory.state_handler, key_row_coder, None,
            serialized_fn.state_cache_size,
            serialized_fn.map_state_read_cache_size,
            serialized_fn.map_state_write_cache_size)
        return beam_operation_cls(name, spec, factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls, keyed_state_backend)
    else:
        return beam_operation_cls(name, spec, factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls)