def extract_data_view_specs(udfs): extracted_udf_data_view_specs = [] for udf in udfs: udf_data_view_specs_proto = udf.specs if udf_data_view_specs_proto is None: extracted_udf_data_view_specs.append([]) extracted_specs = [] for spec_proto in udf_data_view_specs_proto: state_id = spec_proto.name field_index = spec_proto.field_index if spec_proto.HasField("list_view"): element_coder = from_proto(spec_proto.list_view.element_type) extracted_specs.append( ListViewSpec(state_id, field_index, element_coder)) elif spec_proto.HasField("map_view"): key_coder = from_proto(spec_proto.map_view.key_type) value_coder = from_proto(spec_proto.map_view.value_type) extracted_specs.append( MapViewSpec(state_id, field_index, key_coder, value_coder)) else: raise Exception("Unsupported data view spec type: " + spec_proto.type) extracted_udf_data_view_specs.append(extracted_specs) if all([len(i) == 0 for i in extracted_udf_data_view_specs]): return [] return extracted_udf_data_view_specs
def _create_user_defined_function_operation(factory, transform_proto, consumers, udfs_proto, beam_operation_cls, internal_operation_cls): output_tags = list(transform_proto.outputs.keys()) output_coders = factory.get_output_coders(transform_proto) spec = operation_specs.WorkerDoFn( serialized_fn=udfs_proto, output_tags=output_tags, input=None, side_inputs=None, output_coders=[output_coders[tag] for tag in output_tags]) if hasattr(spec.serialized_fn, "key_type"): # keyed operation, need to create the KeyedStateBackend. key_row_coder = from_proto(spec.serialized_fn.key_type) keyed_state_backend = RemoteKeyedStateBackend( factory.state_handler, key_row_coder, spec.serialized_fn.state_cache_size, spec.serialized_fn.map_state_read_cache_size, spec.serialized_fn.map_state_write_cache_size) return beam_operation_cls(transform_proto.unique_name, spec, factory.counter_factory, factory.state_sampler, consumers, internal_operation_cls, keyed_state_backend) else: return beam_operation_cls(transform_proto.unique_name, spec, factory.counter_factory, factory.state_sampler, consumers, internal_operation_cls)
def extract_data_view_specs(udfs): extracted_udf_data_view_specs = [] current_index = -1 for udf in udfs: current_index += 1 udf_data_view_specs_proto = udf.specs if not udf_data_view_specs_proto: if is_built_in_function(udf.payload): built_in_function = load_aggregate_function(udf.payload) accumulator = built_in_function.create_accumulator() extracted_udf_data_view_specs.append( extract_data_view_specs_from_accumulator( current_index, accumulator)) else: extracted_udf_data_view_specs.append([]) else: extracted_specs = [] for spec_proto in udf_data_view_specs_proto: state_id = spec_proto.name field_index = spec_proto.field_index if spec_proto.HasField("list_view"): element_coder = from_proto( spec_proto.list_view.element_type) extracted_specs.append( ListViewSpec(state_id, field_index, element_coder)) elif spec_proto.HasField("map_view"): key_coder = from_proto(spec_proto.map_view.key_type) value_coder = from_proto(spec_proto.map_view.value_type) extracted_specs.append( MapViewSpec(state_id, field_index, key_coder, value_coder)) else: raise Exception("Unsupported data view spec type: " + spec_proto.type) extracted_udf_data_view_specs.append(extracted_specs) if all([len(i) == 0 for i in extracted_udf_data_view_specs]): return [] return extracted_udf_data_view_specs
def _create_user_defined_function_operation(factory, transform_proto, consumers, udfs_proto, beam_operation_cls, internal_operation_cls): output_tags = list(transform_proto.outputs.keys()) output_coders = factory.get_output_coders(transform_proto) spec = operation_specs.WorkerDoFn( serialized_fn=udfs_proto, output_tags=output_tags, input=None, side_inputs=None, output_coders=[output_coders[tag] for tag in output_tags]) name = common.NameContext(transform_proto.unique_name) serialized_fn = spec.serialized_fn if hasattr(serialized_fn, "key_type"): # keyed operation, need to create the KeyedStateBackend. row_schema = serialized_fn.key_type.row_schema key_row_coder = FlattenRowCoder( [from_proto(f.type) for f in row_schema.fields]) if serialized_fn.HasField('group_window'): if serialized_fn.group_window.is_time_window: window_coder = TimeWindowCoder() else: window_coder = CountWindowCoder() else: window_coder = None keyed_state_backend = RemoteKeyedStateBackend( factory.state_handler, key_row_coder, window_coder, serialized_fn.state_cache_size, serialized_fn.map_state_read_cache_size, serialized_fn.map_state_write_cache_size) return beam_operation_cls(name, spec, factory.counter_factory, factory.state_sampler, consumers, internal_operation_cls, keyed_state_backend) elif internal_operation_cls == datastream_operations.StatefulOperation: key_row_coder = from_type_info_proto(serialized_fn.key_type_info) keyed_state_backend = RemoteKeyedStateBackend( factory.state_handler, key_row_coder, None, serialized_fn.state_cache_size, serialized_fn.map_state_read_cache_size, serialized_fn.map_state_write_cache_size) return beam_operation_cls(name, spec, factory.counter_factory, factory.state_sampler, consumers, internal_operation_cls, keyed_state_backend) else: return beam_operation_cls(name, spec, factory.counter_factory, factory.state_sampler, consumers, internal_operation_cls)