Example #1
0
    class BasicStatefulDoFn(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      EXPIRY_TIMER = TimerSpec('expiry1', TimeDomain.WATERMARK)
      EXPIRY_TIMER_FAMILY = TimerSpec('expiry_family_1', TimeDomain.WATERMARK)

      def process(
          self,
          element,
          buffer=DoFn.StateParam(BUFFER_STATE),
          timer1=DoFn.TimerParam(EXPIRY_TIMER),
          dynamic_timer=DoFn.TimerParam(EXPIRY_TIMER_FAMILY)):
        yield element

      @on_timer(EXPIRY_TIMER)
      def expiry_callback(self, element, timer=DoFn.TimerParam(EXPIRY_TIMER)):
        yield element

      @on_timer(EXPIRY_TIMER_FAMILY)
      def expiry_family_callback(
          self, element, dynamic_timer=DoFn.TimerParam(EXPIRY_TIMER_FAMILY)):
        yield element
Example #2
0
        class StatefulDoFnWithTimerWithTypo3(DoFn):
            BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
            EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
            EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)

            def process(self,
                        element,
                        timer1=DoFn.TimerParam(EXPIRY_TIMER_1),
                        timer2=DoFn.TimerParam(EXPIRY_TIMER_2)):
                pass

            @on_timer(EXPIRY_TIMER_1)
            def on_expiry_1(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
                yield 'expired1'

            def on_expiry_2(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
                yield 'expired2'

            # Use a stable string value for matching.
            def __repr__(self):
                return 'StatefulDoFnWithTimerWithTypo3'
Example #3
0
class TestStatefulDoFn(DoFn):
  """An example stateful DoFn with state and timers."""

  BUFFER_STATE_1 = BagStateSpec('buffer', BytesCoder())
  BUFFER_STATE_2 = BagStateSpec('buffer2', VarIntCoder())
  EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
  EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)
  EXPIRY_TIMER_3 = TimerSpec('expiry3', TimeDomain.WATERMARK)

  def process(self, element, t=DoFn.TimestampParam,
              buffer_1=DoFn.StateParam(BUFFER_STATE_1),
              buffer_2=DoFn.StateParam(BUFFER_STATE_2),
              timer_1=DoFn.TimerParam(EXPIRY_TIMER_1),
              timer_2=DoFn.TimerParam(EXPIRY_TIMER_2)):
    yield element

  @on_timer(EXPIRY_TIMER_1)
  def on_expiry_1(self,
                  window=DoFn.WindowParam,
                  timestamp=DoFn.TimestampParam,
                  key=DoFn.KeyParam,
                  buffer=DoFn.StateParam(BUFFER_STATE_1),
                  timer_1=DoFn.TimerParam(EXPIRY_TIMER_1),
                  timer_2=DoFn.TimerParam(EXPIRY_TIMER_2),
                  timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
    yield 'expired1'

  @on_timer(EXPIRY_TIMER_2)
  def on_expiry_2(self,
                  buffer=DoFn.StateParam(BUFFER_STATE_2),
                  timer_2=DoFn.TimerParam(EXPIRY_TIMER_2),
                  timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
    yield 'expired2'

  @on_timer(EXPIRY_TIMER_3)
  def on_expiry_3(self,
                  buffer_1=DoFn.StateParam(BUFFER_STATE_1),
                  buffer_2=DoFn.StateParam(BUFFER_STATE_2),
                  timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
    yield 'expired3'
Example #4
0
    class HashJoinStatefulDoFn(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      UNMATCHED_TIMER = TimerSpec('unmatched', TimeDomain.WATERMARK)

      def process(self, element, state=DoFn.StateParam(BUFFER_STATE),
                  timer=DoFn.TimerParam(UNMATCHED_TIMER)):
        key, value = element
        existing_values = list(state.read())
        if not existing_values:
          state.add(value)
          timer.set(100)
        else:
          yield b'Record<%s,%s,%s>' % (key, existing_values[0], value)
          state.clear()
          timer.clear()

      @on_timer(UNMATCHED_TIMER)
      def expiry_callback(self, state=DoFn.StateParam(BUFFER_STATE)):
        buffered = list(state.read())
        assert len(buffered) == 1, buffered
        state.clear()
        yield b'Unmatched<%s>' % (buffered[0],)
Example #5
0
    class StatefulDoFnWithTimerWithTypo2(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
      EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)

      def process(self, element,
                  timer1=DoFn.TimerParam(EXPIRY_TIMER_1),
                  timer2=DoFn.TimerParam(EXPIRY_TIMER_2)):
        pass

      @on_timer(EXPIRY_TIMER_1)
      def on_expiry_1(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
        yield 'expired1'

      # Note that we mistakenly reuse the "on_expiry_1" name; this is valid
      # syntactically in Python.
      @on_timer(EXPIRY_TIMER_2)
      def on_expiry_1(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
        yield 'expired2'

      # Use a stable string value for matching.
      def __repr__(self):
        return 'StatefulDoFnWithTimerWithTypo2'
Example #6
0
 def test_param_construction(self):
     with self.assertRaises(ValueError):
         DoFn.StateParam(TimerSpec('timer', TimeDomain.WATERMARK))
     with self.assertRaises(ValueError):
         DoFn.TimerParam(BagStateSpec('elements', BytesCoder()))
Example #7
0
from apache_beam.runners.worker import bundle_processor
from apache_beam.transforms import trigger
from apache_beam.transforms import window
from apache_beam.transforms.window import GlobalWindow
from apache_beam.transforms.window import GlobalWindows
from apache_beam.utils import proto_utils
from apache_beam.utils import windowed_value

if TYPE_CHECKING:
    from apache_beam.coders.coder_impl import CoderImpl
    from apache_beam.runners.portability.fn_api_runner import worker_handlers
    from apache_beam.runners.portability.fn_api_runner.translations import DataSideInput
    from apache_beam.transforms.window import BoundedWindow

ENCODED_IMPULSE_VALUE = WindowedValueCoder(
    BytesCoder(), GlobalWindowCoder()).get_impl().encode_nested(
        GlobalWindows.windowed_value(b''))

SAFE_WINDOW_FNS = set(window.WindowFn._known_urns.keys()) - set(
    [python_urns.PICKLED_WINDOWFN])


class Buffer(Protocol):
    def __iter__(self):
        # type: () -> Iterator[bytes]
        pass

    def append(self, item):
        # type: (bytes) -> None
        pass
Example #8
0
    class BadStatefulDoFn1(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())

      def process(self, element, b1=DoFn.StateParam(BUFFER_STATE),
                  b2=DoFn.StateParam(BUFFER_STATE)):
        yield element
Example #9
0
def _encode_str(str_obj):
    encoded_str = str_obj.encode('utf-8')
    coder = LengthPrefixCoder(BytesCoder())
    coder_urns = ['beam:coder:bytes:v1']
    return ConfigValue(coder_urn=coder_urns, payload=coder.encode(encoded_str))
Example #10
0
def _encode_list(list_obj):
    encoded_list = [val.encode('utf-8') for val in list_obj]
    coder = IterableCoder(LengthPrefixCoder(BytesCoder()))
    coder_urns = ['beam:coder:iterable:v1', 'beam:coder:bytes:v1']
    return ConfigValue(coder_urn=coder_urns,
                       payload=coder.encode(encoded_list))