Example #1
0
        class DynamicTimerDoFn(DoFn):
            EMIT_TIMER_FAMILY1 = TimerSpec('emit_family_1',
                                           TimeDomain.WATERMARK)
            EMIT_TIMER_FAMILY2 = TimerSpec('emit_family_2',
                                           TimeDomain.WATERMARK)

            def process(self,
                        element,
                        emit1=DoFn.TimerParam(EMIT_TIMER_FAMILY1),
                        emit2=DoFn.TimerParam(EMIT_TIMER_FAMILY2)):
                emit1.set(10, dynamic_timer_tag='emit11')
                emit1.set(20, dynamic_timer_tag='emit12')
                emit1.set(30, dynamic_timer_tag='emit13')
                emit2.set(30, dynamic_timer_tag='emit21')
                emit2.set(20, dynamic_timer_tag='emit22')
                emit2.set(10, dynamic_timer_tag='emit23')

            @on_timer(EMIT_TIMER_FAMILY1)
            def emit_callback(self,
                              ts=DoFn.TimestampParam,
                              tag=DoFn.DynamicTimerTagParam):
                yield (tag, ts)

            @on_timer(EMIT_TIMER_FAMILY2)
            def emit_callback_2(self,
                                ts=DoFn.TimestampParam,
                                tag=DoFn.DynamicTimerTagParam):
                yield (tag, ts)
Example #2
0
    class StatefulDoFnWithTimerWithTypo2(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
      EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)

      def process(
          self,
          element,
          timer1=DoFn.TimerParam(EXPIRY_TIMER_1),
          timer2=DoFn.TimerParam(EXPIRY_TIMER_2)):
        pass

      @on_timer(EXPIRY_TIMER_1)
      def on_expiry_1(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
        yield 'expired1'

      # Note that we mistakenly reuse the "on_expiry_1" name; this is valid
      # syntactically in Python.
      @on_timer(EXPIRY_TIMER_2)
      def on_expiry_1(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):  # pylint: disable=function-redefined
        yield 'expired2'

      # Use a stable string value for matching.
      def __repr__(self):
        return 'StatefulDoFnWithTimerWithTypo2'
Example #3
0
    class TimerEmittingStatefulDoFn(DoFn):
      EMIT_TIMER_1 = TimerSpec('emit1', TimeDomain.WATERMARK)
      EMIT_TIMER_2 = TimerSpec('emit2', TimeDomain.WATERMARK)
      EMIT_TIMER_3 = TimerSpec('emit3', TimeDomain.WATERMARK)

      def process(
          self,
          element,
          timer1=DoFn.TimerParam(EMIT_TIMER_1),
          timer2=DoFn.TimerParam(EMIT_TIMER_2),
          timer3=DoFn.TimerParam(EMIT_TIMER_3)):
        timer1.set(10)
        timer2.set(20)
        timer3.set(30)

      @on_timer(EMIT_TIMER_1)
      def emit_callback_1(self):
        yield 'timer1'

      @on_timer(EMIT_TIMER_2)
      def emit_callback_2(self):
        yield 'timer2'

      @on_timer(EMIT_TIMER_3)
      def emit_callback_3(self):
        yield 'timer3'
Example #4
0
    class SetStateClearingStatefulDoFn(beam.DoFn):

      SET_STATE = SetStateSpec('buffer', StrUtf8Coder())
      EMIT_TIMER = TimerSpec('emit_timer', TimeDomain.WATERMARK)
      CLEAR_TIMER = TimerSpec('clear_timer', TimeDomain.WATERMARK)

      def process(
          self,
          element,
          set_state=beam.DoFn.StateParam(SET_STATE),
          emit_timer=beam.DoFn.TimerParam(EMIT_TIMER),
          clear_timer=beam.DoFn.TimerParam(CLEAR_TIMER)):
        value = element[1]
        set_state.add(value)
        clear_timer.set(100)
        emit_timer.set(1000)

      @on_timer(EMIT_TIMER)
      def emit_values(self, set_state=beam.DoFn.StateParam(SET_STATE)):
        for value in set_state.read():
          yield value

      @on_timer(CLEAR_TIMER)
      def clear_values(self, set_state=beam.DoFn.StateParam(SET_STATE)):
        set_state.clear()
        set_state.add('different-value')
Example #5
0
    class BagStateClearingStatefulDoFn(beam.DoFn):

      BAG_STATE = BagStateSpec('bag_state', StrUtf8Coder())
      EMIT_TIMER = TimerSpec('emit_timer', TimeDomain.WATERMARK)
      CLEAR_TIMER = TimerSpec('clear_timer', TimeDomain.WATERMARK)

      def process(
          self,
          element,
          bag_state=beam.DoFn.StateParam(BAG_STATE),
          emit_timer=beam.DoFn.TimerParam(EMIT_TIMER),
          clear_timer=beam.DoFn.TimerParam(CLEAR_TIMER)):
        value = element[1]
        bag_state.add(value)
        clear_timer.set(100)
        emit_timer.set(1000)

      @on_timer(EMIT_TIMER)
      def emit_values(self, bag_state=beam.DoFn.StateParam(BAG_STATE)):
        for value in bag_state.read():
          yield value
        yield 'extra'

      @on_timer(CLEAR_TIMER)
      def clear_values(self, bag_state=beam.DoFn.StateParam(BAG_STATE)):
        bag_state.clear()
Example #6
0
  def test_spec_construction(self):
    BagStateSpec('statename', VarIntCoder())
    with self.assertRaises(TypeError):
      BagStateSpec(123, VarIntCoder())

    CombiningValueStateSpec('statename', VarIntCoder(), TopCombineFn(10))
    with self.assertRaises(TypeError):
      CombiningValueStateSpec(123, VarIntCoder(), TopCombineFn(10))
    with self.assertRaises(TypeError):
      CombiningValueStateSpec('statename', VarIntCoder(), object())

    SetStateSpec('setstatename', VarIntCoder())
    with self.assertRaises(TypeError):
      SetStateSpec(123, VarIntCoder())
    with self.assertRaises(TypeError):
      SetStateSpec('setstatename', object())

    ReadModifyWriteStateSpec('valuestatename', VarIntCoder())
    with self.assertRaises(TypeError):
      ReadModifyWriteStateSpec(123, VarIntCoder())
    with self.assertRaises(TypeError):
      ReadModifyWriteStateSpec('valuestatename', object())

    # TODO: add more spec tests
    with self.assertRaises(ValueError):
      DoFn.TimerParam(BagStateSpec('elements', BytesCoder()))

    TimerSpec('timer', TimeDomain.WATERMARK)
    TimerSpec('timer', TimeDomain.REAL_TIME)
    with self.assertRaises(ValueError):
      TimerSpec('timer', 'bogus_time_domain')
    with self.assertRaises(ValueError):
      DoFn.StateParam(TimerSpec('timer', TimeDomain.WATERMARK))
Example #7
0
def _pardo_group_into_batches(
    input_coder, batch_size, max_buffering_duration_secs, clock=time.time):
  ELEMENT_STATE = BagStateSpec('values', input_coder)
  COUNT_STATE = CombiningValueStateSpec('count', input_coder, CountCombineFn())
  WINDOW_TIMER = TimerSpec('window_end', TimeDomain.WATERMARK)
  BUFFERING_TIMER = TimerSpec('buffering_end', TimeDomain.REAL_TIME)

  class _GroupIntoBatchesDoFn(DoFn):
    def process(
        self,
        element,
        window=DoFn.WindowParam,
        element_state=DoFn.StateParam(ELEMENT_STATE),
        count_state=DoFn.StateParam(COUNT_STATE),
        window_timer=DoFn.TimerParam(WINDOW_TIMER),
        buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)):
      # Allowed lateness not supported in Python SDK
      # https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data
      window_timer.set(window.end)
      element_state.add(element)
      count_state.add(1)
      count = count_state.read()
      if count == 1 and max_buffering_duration_secs > 0:
        # This is the first element in batch. Start counting buffering time if a
        # limit was set.
        # pylint: disable=deprecated-method
        buffering_timer.set(clock() + max_buffering_duration_secs)
      if count >= batch_size:
        return self.flush_batch(element_state, count_state, buffering_timer)

    @on_timer(WINDOW_TIMER)
    def on_window_timer(
        self,
        element_state=DoFn.StateParam(ELEMENT_STATE),
        count_state=DoFn.StateParam(COUNT_STATE),
        buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)):
      return self.flush_batch(element_state, count_state, buffering_timer)

    @on_timer(BUFFERING_TIMER)
    def on_buffering_timer(
        self,
        element_state=DoFn.StateParam(ELEMENT_STATE),
        count_state=DoFn.StateParam(COUNT_STATE),
        buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)):
      return self.flush_batch(element_state, count_state, buffering_timer)

    def flush_batch(self, element_state, count_state, buffering_timer):
      batch = [element for element in element_state.read()]
      if not batch:
        return
      key, _ = batch[0]
      batch_values = [v for (k, v) in batch]
      element_state.clear()
      count_state.clear()
      buffering_timer.clear()
      yield key, batch_values

  return _GroupIntoBatchesDoFn()
Example #8
0
    class BadStatefulDoFn4(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
      EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)

      @on_timer(EXPIRY_TIMER_1)
      def expiry_callback(self, element, t1=DoFn.TimerParam(EXPIRY_TIMER_2),
                          t2=DoFn.TimerParam(EXPIRY_TIMER_2)):
        yield element
Example #9
0
class TestStatefulDoFn(DoFn):
  """An example stateful DoFn with state and timers."""

  BUFFER_STATE_1 = BagStateSpec('buffer', BytesCoder())
  BUFFER_STATE_2 = BagStateSpec('buffer2', VarIntCoder())
  EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
  EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)
  EXPIRY_TIMER_3 = TimerSpec('expiry3', TimeDomain.WATERMARK)
  EXPIRY_TIMER_FAMILY = TimerSpec('expiry_family', TimeDomain.WATERMARK)

  def process(
      self,
      element,
      t=DoFn.TimestampParam,
      buffer_1=DoFn.StateParam(BUFFER_STATE_1),
      buffer_2=DoFn.StateParam(BUFFER_STATE_2),
      timer_1=DoFn.TimerParam(EXPIRY_TIMER_1),
      timer_2=DoFn.TimerParam(EXPIRY_TIMER_2),
      dynamic_timer=DoFn.TimerParam(EXPIRY_TIMER_FAMILY)):
    yield element

  @on_timer(EXPIRY_TIMER_1)
  def on_expiry_1(
      self,
      window=DoFn.WindowParam,
      timestamp=DoFn.TimestampParam,
      key=DoFn.KeyParam,
      buffer=DoFn.StateParam(BUFFER_STATE_1),
      timer_1=DoFn.TimerParam(EXPIRY_TIMER_1),
      timer_2=DoFn.TimerParam(EXPIRY_TIMER_2),
      timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
    yield 'expired1'

  @on_timer(EXPIRY_TIMER_2)
  def on_expiry_2(
      self,
      buffer=DoFn.StateParam(BUFFER_STATE_2),
      timer_2=DoFn.TimerParam(EXPIRY_TIMER_2),
      timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
    yield 'expired2'

  @on_timer(EXPIRY_TIMER_3)
  def on_expiry_3(
      self,
      buffer_1=DoFn.StateParam(BUFFER_STATE_1),
      buffer_2=DoFn.StateParam(BUFFER_STATE_2),
      timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
    yield 'expired3'

  @on_timer(EXPIRY_TIMER_FAMILY)
  def on_expiry_family(
      self,
      dynamic_timer=DoFn.TimerParam(EXPIRY_TIMER_FAMILY),
      dynamic_timer_tag=DoFn.DynamicTimerTagParam):
    yield (dynamic_timer_tag, 'expired_dynamic_timer')
Example #10
0
File: util.py Project: xmarker/beam
def _pardo_group_into_batches(batch_size, input_coder):
  ELEMENT_STATE = BagStateSpec('values', input_coder)
  COUNT_STATE = CombiningValueStateSpec('count', input_coder, CountCombineFn())
  EXPIRY_TIMER = TimerSpec('expiry', TimeDomain.WATERMARK)

  class _GroupIntoBatchesDoFn(DoFn):

    def process(self, element,
                window=DoFn.WindowParam,
                element_state=DoFn.StateParam(ELEMENT_STATE),
                count_state=DoFn.StateParam(COUNT_STATE),
                expiry_timer=DoFn.TimerParam(EXPIRY_TIMER)):
      # Allowed lateness not supported in Python SDK
      # https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data
      expiry_timer.set(window.end)
      element_state.add(element)
      count_state.add(1)
      count = count_state.read()
      if count >= batch_size:
        batch = [element for element in element_state.read()]
        yield batch
        element_state.clear()
        count_state.clear()

    @on_timer(EXPIRY_TIMER)
    def expiry(self, element_state=DoFn.StateParam(ELEMENT_STATE),
               count_state=DoFn.StateParam(COUNT_STATE)):
      batch = [element for element in element_state.read()]
      if batch:
        yield batch
        element_state.clear()
        count_state.clear()

  return _GroupIntoBatchesDoFn()
Example #11
0
class BigBagDoFn(DoFn):
    VALUES_STATE = BagStateSpec('values', BytesCoder())
    END_OF_WINDOW_TIMER = TimerSpec('end_of_window', TimeDomain.WATERMARK)

    def process(self,
                element: Tuple[str, bytes],
                window=DoFn.WindowParam,
                values_state=DoFn.StateParam(VALUES_STATE),
                end_of_window_timer=DoFn.TimerParam(END_OF_WINDOW_TIMER)):
        logging.info('start process.')
        key, value = element
        end_of_window_timer.set(window.end)
        values_state.add(value)
        logging.info('end process.')

    @on_timer(END_OF_WINDOW_TIMER)
    def end_of_window(self, values_state=DoFn.StateParam(VALUES_STATE)):
        logging.info('start end_of_window.')

        read_count = 0
        read_bytes = 0
        values = values_state.read()
        for value in values:
            read_count += 1
            read_bytes += len(value)

        logging.info('read_count: %s, read_bytes: %s', read_count, read_bytes)
        logging.info('end end_of_window.')
Example #12
0
        class HashJoinStatefulDoFn(DoFn):
            BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
            UNMATCHED_TIMER = TimerSpec('unmatched', TimeDomain.WATERMARK)

            def process(self,
                        element,
                        state=DoFn.StateParam(BUFFER_STATE),
                        timer=DoFn.TimerParam(UNMATCHED_TIMER)):
                key, value = element
                existing_values = list(state.read())
                if not existing_values:
                    state.add(value)
                    timer.set(100)
                else:
                    yield b'Record<%s,%s,%s>' % (key, existing_values[0],
                                                 value)
                    state.clear()
                    timer.clear()

            @on_timer(UNMATCHED_TIMER)
            def expiry_callback(self, state=DoFn.StateParam(BUFFER_STATE)):
                buffered = list(state.read())
                assert len(buffered) == 1, buffered
                state.clear()
                yield b'Unmatched<%s>' % (buffered[0], )
Example #13
0
        class GenerateRecords(beam.DoFn):

            EMIT_TIMER = TimerSpec('emit_timer', TimeDomain.REAL_TIME)
            COUNT_STATE = CombiningValueStateSpec('count_state', VarIntCoder(),
                                                  CountCombineFn())

            def __init__(self, frequency, total_records):
                self.total_records = total_records
                self.frequency = frequency

            def process(self,
                        element,
                        emit_timer=beam.DoFn.TimerParam(EMIT_TIMER)):
                # Processing time timers should be set on ABSOLUTE TIME.
                emit_timer.set(self.frequency)
                yield element[1]

            @on_timer(EMIT_TIMER)
            def emit_values(self,
                            emit_timer=beam.DoFn.TimerParam(EMIT_TIMER),
                            count_state=beam.DoFn.StateParam(COUNT_STATE)):
                count = count_state.read() or 0
                if self.total_records == count:
                    return

                count_state.add(1)
                # Processing time timers should be set on ABSOLUTE TIME.
                emit_timer.set(count + 1 + self.frequency)
                yield 'value'
Example #14
0
      class StatefulDoFnWithTimerWithTypo1(DoFn):  # pylint: disable=unused-variable
        BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
        EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
        EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)

        def process(self, element):
          pass

        @on_timer(EXPIRY_TIMER_1)
        def on_expiry_1(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
          yield 'expired1'

        # Note that we mistakenly associate this with the first timer.
        @on_timer(EXPIRY_TIMER_1)
        def on_expiry_2(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
          yield 'expired2'
Example #15
0
        class BadStatefulDoFn2(DoFn):
            TIMER = TimerSpec('timer', TimeDomain.WATERMARK)

            def process(self,
                        element,
                        t1=DoFn.TimerParam(TIMER),
                        t2=DoFn.TimerParam(TIMER)):
                yield element
Example #16
0
        class BadStatefulDoFn5(DoFn):
            EXPIRY_TIMER_FAMILY = TimerSpec('dynamic_timer',
                                            TimeDomain.WATERMARK)

            def process(self,
                        element,
                        dynamic_timer_1=DoFn.TimerParam(EXPIRY_TIMER_FAMILY),
                        dynamic_timer_2=DoFn.TimerParam(EXPIRY_TIMER_FAMILY)):
                yield element
Example #17
0
    class TimerEmittingStatefulDoFn(DoFn):
      EMIT_TIMER_1 = TimerSpec('emit1', TimeDomain.WATERMARK)

      def process(self, element, timer1=DoFn.TimerParam(EMIT_TIMER_1)):
        timer1.set(10)

      @on_timer(EMIT_TIMER_1)
      def emit_callback_1(self,
                          window=DoFn.WindowParam,
                          ts=DoFn.TimestampParam):
        yield ('timer1', int(ts), int(window.start), int(window.end))
Example #18
0
    class DynamicTimerDoFn(DoFn):
      EMIT_TIMER_FAMILY = TimerSpec('emit', TimeDomain.WATERMARK)

      def process(self, element, emit=DoFn.TimerParam(EMIT_TIMER_FAMILY)):
        emit.set(10)
        emit.set(20, dynamic_timer_tag='')

      @on_timer(EMIT_TIMER_FAMILY)
      def emit_callback(
          self, ts=DoFn.TimestampParam, tag=DoFn.DynamicTimerTagParam):
        yield (tag, ts)
Example #19
0
    class BasicStatefulDoFn(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      EXPIRY_TIMER = TimerSpec('expiry1', TimeDomain.WATERMARK)

      def process(self, element, buffer=DoFn.StateParam(BUFFER_STATE),
                  timer1=DoFn.TimerParam(EXPIRY_TIMER)):
        yield element

      @on_timer(EXPIRY_TIMER)
      def expiry_callback(self, element, timer=DoFn.TimerParam(EXPIRY_TIMER)):
        yield element
Example #20
0
    class StatefulDoFnWithTimerWithTypo3(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
      EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)

      def process(self, element,
                  timer1=DoFn.TimerParam(EXPIRY_TIMER_1),
                  timer2=DoFn.TimerParam(EXPIRY_TIMER_2)):
        pass

      @on_timer(EXPIRY_TIMER_1)
      def on_expiry_1(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
        yield 'expired1'

      def on_expiry_2(self, buffer_state=DoFn.StateParam(BUFFER_STATE)):
        yield 'expired2'

      # Use a stable string value for matching.
      def __repr__(self):
        return 'StatefulDoFnWithTimerWithTypo3'
Example #21
0
        class EmitTwoEvents(DoFn):
            EMIT_CLEAR_SET_TIMER = TimerSpec('emitclear', TimeDomain.WATERMARK)

            def process(self,
                        element,
                        emit=DoFn.TimerParam(EMIT_CLEAR_SET_TIMER)):
                yield ('1', 'set')
                emit.set(1)

            @on_timer(EMIT_CLEAR_SET_TIMER)
            def emit_clear(self):
                yield ('1', 'clear')
Example #22
0
class TestStatefulDoFn(DoFn):
    """An example stateful DoFn with state and timers."""

    BUFFER_STATE_1 = BagStateSpec('buffer', BytesCoder())
    BUFFER_STATE_2 = BagStateSpec('buffer2', VarIntCoder())
    EXPIRY_TIMER_1 = TimerSpec('expiry1', TimeDomain.WATERMARK)
    EXPIRY_TIMER_2 = TimerSpec('expiry2', TimeDomain.WATERMARK)
    EXPIRY_TIMER_3 = TimerSpec('expiry3', TimeDomain.WATERMARK)

    def process(self,
                element,
                t=DoFn.TimestampParam,
                buffer_1=DoFn.StateParam(BUFFER_STATE_1),
                buffer_2=DoFn.StateParam(BUFFER_STATE_2),
                timer_1=DoFn.TimerParam(EXPIRY_TIMER_1),
                timer_2=DoFn.TimerParam(EXPIRY_TIMER_2)):
        yield element

    @on_timer(EXPIRY_TIMER_1)
    def on_expiry_1(self,
                    buffer=DoFn.StateParam(BUFFER_STATE_1),
                    timer_1=DoFn.TimerParam(EXPIRY_TIMER_1),
                    timer_2=DoFn.TimerParam(EXPIRY_TIMER_2),
                    timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
        yield 'expired1'

    @on_timer(EXPIRY_TIMER_2)
    def on_expiry_2(self,
                    buffer=DoFn.StateParam(BUFFER_STATE_2),
                    timer_2=DoFn.TimerParam(EXPIRY_TIMER_2),
                    timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
        yield 'expired2'

    @on_timer(EXPIRY_TIMER_3)
    def on_expiry_3(self,
                    buffer_1=DoFn.StateParam(BUFFER_STATE_1),
                    buffer_2=DoFn.StateParam(BUFFER_STATE_2),
                    timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)):
        yield 'expired3'
Example #23
0
    class SimpleTestSetStatefulDoFn(DoFn):
      BUFFER_STATE = SetStateSpec('buffer', VarIntCoder())
      EXPIRY_TIMER = TimerSpec('expiry', TimeDomain.WATERMARK)

      def process(self, element, buffer=DoFn.StateParam(BUFFER_STATE),
                  timer1=DoFn.TimerParam(EXPIRY_TIMER)):
        unused_key, value = element
        buffer.add(value)
        timer1.set(20)

      @on_timer(EXPIRY_TIMER)
      def expiry_callback(self, buffer=DoFn.StateParam(BUFFER_STATE)):
        yield sorted(buffer.read())
Example #24
0
        class DynamicTimerDoFn(DoFn):
            EMIT_TIMER_FAMILY = TimerSpec('emit', TimeDomain.WATERMARK)
            GC_TIMER = TimerSpec('gc', TimeDomain.WATERMARK)

            def process(self,
                        element,
                        emit=DoFn.TimerParam(EMIT_TIMER_FAMILY),
                        gc=DoFn.TimerParam(GC_TIMER)):
                emit.set(10, dynamic_timer_tag='emit1')
                emit.set(20, dynamic_timer_tag='emit2')
                emit.set(30, dynamic_timer_tag='emit3')
                gc.set(40)

            @on_timer(EMIT_TIMER_FAMILY)
            def emit_callback(self,
                              ts=DoFn.TimestampParam,
                              tag=DoFn.DynamicTimerTagParam):
                yield (tag, ts)

            @on_timer(GC_TIMER)
            def gc(self, ts=DoFn.TimestampParam):
                yield ('gc', ts)
Example #25
0
    class SimpleTestStatefulDoFn(DoFn):
      BUFFER_STATE = BagStateSpec('buffer', BytesCoder())
      EXPIRY_TIMER = TimerSpec('expiry', TimeDomain.WATERMARK)

      def process(self, element, buffer=DoFn.StateParam(BUFFER_STATE),
                  timer1=DoFn.TimerParam(EXPIRY_TIMER)):
        unused_key, value = element
        buffer.add(b'A' + str(value).encode('latin1'))
        timer1.set(20)

      @on_timer(EXPIRY_TIMER)
      def expiry_callback(self, buffer=DoFn.StateParam(BUFFER_STATE),
                          timer=DoFn.TimerParam(EXPIRY_TIMER)):
        yield b''.join(sorted(buffer.read()))
Example #26
0
class CountAndSchedule(beam.DoFn):

    COUNTER = BagStateSpec('counter', VarIntCoder())
    SCHEDULED_TIMESTAMP = BagStateSpec('nextSchedule', VarIntCoder())
    TIMER = TimerSpec('timer', TimeDomain.WATERMARK)

    def process(self,
                element,
                timestamp=beam.DoFn.TimestampParam,
                timer=beam.DoFn.TimerParam(TIMER),
                counter=beam.DoFn.StateParam(COUNTER),
                next_schedule=beam.DoFn.StateParam(SCHEDULED_TIMESTAMP),
                *args,
                **kwargs):
        current_count, = list(counter.read()) or [0]
        counter.clear()
        counter.add(current_count + 1)

        event_datetime = timestamp.to_utc_datetime()
        current_hour_end = event_datetime.replace(
            second=0, microsecond=0) + timedelta(minutes=1)

        next_tick = calendar.timegm(current_hour_end.timetuple())
        timer.set(next_tick)

        next_schedule.clear()
        next_schedule.add(next_tick)

    @on_timer(TIMER)
    def timer_ticked(self,
                     timer=beam.DoFn.TimerParam(TIMER),
                     counter=beam.DoFn.StateParam(COUNTER),
                     next_schedule=beam.DoFn.StateParam(SCHEDULED_TIMESTAMP)):
        print("TICKTICK")
        current_count, = counter.read()
        this_tick, = next_schedule.read()

        next_tick = this_tick + 60

        next_schedule.clear()
        next_schedule.add(next_tick)

        counter.clear()
        counter.add(0)

        timer.clear()
        timer.set(next_tick)

        yield {'count': current_count, 'timestamp': this_tick}
Example #27
0
    class SimpleTestStatefulDoFn(DoFn):
      BUFFER_STATE = CombiningValueStateSpec(
          'buffer',
          IterableCoder(VarIntCoder()), ToListCombineFn())
      EXPIRY_TIMER = TimerSpec('expiry1', TimeDomain.WATERMARK)

      def process(self, element, buffer=DoFn.StateParam(BUFFER_STATE),
                  timer1=DoFn.TimerParam(EXPIRY_TIMER)):
        unused_key, value = element
        buffer.add(value)
        timer1.set(20)

      @on_timer(EXPIRY_TIMER)
      def expiry_callback(self, buffer=DoFn.StateParam(BUFFER_STATE),
                          timer=DoFn.TimerParam(EXPIRY_TIMER)):
        yield ''.join(str(x) for x in sorted(buffer.read()))
class BagInStateOutputAfterTimer(beam.DoFn):

    SET_STATE = SetStateSpec('buffer', VarIntCoder())
    EMIT_TIMER = TimerSpec('emit_timer', TimeDomain.WATERMARK)

    def process(self,
                element,
                set_state=beam.DoFn.StateParam(SET_STATE),
                emit_timer=beam.DoFn.TimerParam(EMIT_TIMER)):
        _, values = element
        for v in values:
            set_state.add(v)
        emit_timer.set(1)

    @on_timer(EMIT_TIMER)
    def emit_values(self, set_state=beam.DoFn.StateParam(SET_STATE)):
        values = set_state.read()
        return [(random.randint(0, 1000), v) for v in values]
Example #29
0
        class DynamicTimerDoFn(DoFn):
            EMIT_TIMER_FAMILY = TimerSpec('emit', TimeDomain.WATERMARK)

            def process(self, element,
                        emit=DoFn.TimerParam(EMIT_TIMER_FAMILY)):
                if element[1] == 'set':
                    emit.set(10, dynamic_timer_tag='emit1')
                    emit.set(20, dynamic_timer_tag='emit2')
                if element[1] == 'clear':
                    emit.set(30, dynamic_timer_tag='emit3')
                    emit.clear(dynamic_timer_tag='emit3')
                    emit.set(40, dynamic_timer_tag='emit3')
                return []

            @on_timer(EMIT_TIMER_FAMILY)
            def emit_callback(self,
                              ts=DoFn.TimestampParam,
                              tag=DoFn.DynamicTimerTagParam):
                yield (tag, ts)
Example #30
0
class TimerExample(beam.DoFn):
    EXPIRY_TIMER = TimerSpec('expiry', beam.TimeDomain.REAL_TIME)
    EXPIRY_TIMER_DURATION_SECONDS = 5

    def process(self,
                elem,
                timestamp=beam.DoFn.TimestampParam,
                expiry_timer=beam.DoFn.TimerParam(EXPIRY_TIMER)):
        (key, msg) = elem
        expiration = time.time() + TimerExample.EXPIRY_TIMER_DURATION_SECONDS
        logging.info('Current element (%s, %s, %s) => Setting the timer to %s',
                     timestamp.to_utc_datetime(), key, msg,
                     datetime.fromtimestamp(expiration))
        expiry_timer.set(expiration)
        yield elem

    @on_timer(EXPIRY_TIMER)
    def expiry(self):
        logging.info("Timer expired after {} seconds".format(
            TimerExample.EXPIRY_TIMER_DURATION_SECONDS))