Beispiel #1
0
 def __init__(self,
              state_handler,
              key_coder,
              namespace_coder,
              state_cache_size,
              map_state_read_cache_size,
              map_state_write_cache_size):
     self._state_handler = state_handler
     self._map_state_handler = CachingMapStateHandler(
         state_handler, map_state_read_cache_size)
     from pyflink.fn_execution.coders import FlattenRowCoder
     self._key_coder_impl = FlattenRowCoder(key_coder._field_coders).get_impl()
     self.namespace_coder = namespace_coder
     if namespace_coder:
         self._namespace_coder_impl = namespace_coder.get_impl()
     else:
         self._namespace_coder_impl = None
     self._state_cache_size = state_cache_size
     self._map_state_write_cache_size = map_state_write_cache_size
     self._all_states = {}
     self._internal_state_cache = LRUCache(self._state_cache_size, None)
     self._internal_state_cache.set_on_evict(
         lambda key, value: self.commit_internal_state(value))
     self._current_key = None
     self._encoded_current_key = None
     self._clear_iterator_mark = beam_fn_api_pb2.StateKey(
         multimap_side_input=beam_fn_api_pb2.StateKey.MultimapSideInput(
             transform_id="clear_iterators",
             side_input_id="clear_iterators",
             key=self._encoded_current_key))
Beispiel #2
0
    def __init__(self, state_handler, key_coder, state_cache_size,
                 map_state_read_cache_size, map_state_write_cache_size):
        self._state_handler = state_handler
        self._map_state_handler = CachingMapStateHandler(
            state_handler, map_state_read_cache_size)

        try:
            from pyflink.fn_execution import coder_impl_fast
            is_fast = True if coder_impl_fast else False
        except:
            is_fast = False
        if not is_fast:
            self._key_coder_impl = key_coder.get_impl()
        else:
            from pyflink.fn_execution.coders import FlattenRowCoder
            self._key_coder_impl = FlattenRowCoder(
                key_coder._field_coders).get_impl()
        self._state_cache_size = state_cache_size
        self._map_state_write_cache_size = map_state_write_cache_size
        self._all_states = {}
        self._internal_state_cache = LRUCache(self._state_cache_size, None)
        self._internal_state_cache.set_on_evict(
            lambda key, value: self.commit_internal_state(value))
        self._current_key = None
        self._encoded_current_key = None
Beispiel #3
0
 def test_flatten_row_coder(self):
     field_coder = BigIntCoder()
     field_count = 10
     coder = FlattenRowCoder([field_coder for _ in range(field_count)]).get_impl()
     v = [None if i % 2 == 0 else i for i in range(field_count)]
     generator_result = coder.decode(coder.encode(v))
     result = []
     for item in generator_result:
         result.append(item)
     self.assertEqual([v], result)
Beispiel #4
0
def _create_user_defined_function_operation(factory, transform_proto,
                                            consumers, udfs_proto,
                                            beam_operation_cls,
                                            internal_operation_cls):
    output_tags = list(transform_proto.outputs.keys())
    output_coders = factory.get_output_coders(transform_proto)
    spec = operation_specs.WorkerDoFn(
        serialized_fn=udfs_proto,
        output_tags=output_tags,
        input=None,
        side_inputs=None,
        output_coders=[output_coders[tag] for tag in output_tags])
    name = common.NameContext(transform_proto.unique_name)

    serialized_fn = spec.serialized_fn
    if hasattr(serialized_fn, "key_type"):
        # keyed operation, need to create the KeyedStateBackend.
        row_schema = serialized_fn.key_type.row_schema
        key_row_coder = FlattenRowCoder(
            [from_proto(f.type) for f in row_schema.fields])
        if serialized_fn.HasField('group_window'):
            if serialized_fn.group_window.is_time_window:
                window_coder = TimeWindowCoder()
            else:
                window_coder = CountWindowCoder()
        else:
            window_coder = None
        keyed_state_backend = RemoteKeyedStateBackend(
            factory.state_handler, key_row_coder, window_coder,
            serialized_fn.state_cache_size,
            serialized_fn.map_state_read_cache_size,
            serialized_fn.map_state_write_cache_size)

        return beam_operation_cls(name, spec, factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls, keyed_state_backend)
    elif internal_operation_cls == datastream_operations.StatefulOperation:
        key_row_coder = from_type_info_proto(serialized_fn.key_type_info)
        keyed_state_backend = RemoteKeyedStateBackend(
            factory.state_handler, key_row_coder, None,
            serialized_fn.state_cache_size,
            serialized_fn.map_state_read_cache_size,
            serialized_fn.map_state_write_cache_size)
        return beam_operation_cls(name, spec, factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls, keyed_state_backend)
    else:
        return beam_operation_cls(name, spec, factory.counter_factory,
                                  factory.state_sampler, consumers,
                                  internal_operation_cls)
Beispiel #5
0
    def __init__(self, state_handler, key_coder, state_cache_size):
        self._state_handler = state_handler

        try:
            from pyflink.fn_execution import coder_impl_fast
            is_fast = True if coder_impl_fast else False
        except:
            is_fast = False
        if not is_fast:
            self._key_coder_impl = key_coder.get_impl()
        else:
            from pyflink.fn_execution.coders import FlattenRowCoder
            self._key_coder_impl = FlattenRowCoder(key_coder._field_coders).get_impl()
        self._state_cache_size = state_cache_size
        self._all_states = {}
        self._all_internal_states = LRUCache(self._state_cache_size, None)
        self._all_internal_states.set_on_evict(lambda k, v: v.commit())
        self._current_key = None
        self._encoded_current_key = None
Beispiel #6
0
 def test_flatten_row_coder(self):
     field_coder = BigIntCoder()
     field_count = 10
     coder = FlattenRowCoder([field_coder for _ in range(field_count)])
     self.check_coder(
         coder, [None if i % 2 == 0 else i for i in range(field_count)])