def process(self, element, timer1=DoFn.TimerParam(EMIT_TIMER_1), timer2=DoFn.TimerParam(EMIT_TIMER_2), timer3=DoFn.TimerParam(EMIT_TIMER_3)): timer1.set(10) timer2.set(20) timer3.set(30)
def process( self, element, buffer=DoFn.StateParam(BUFFER_STATE), timer1=DoFn.TimerParam(EXPIRY_TIMER), dynamic_timer=DoFn.TimerParam(EXPIRY_TIMER_FAMILY)): yield element
def process(self, element, t=DoFn.TimestampParam, buffer_1=DoFn.StateParam(BUFFER_STATE_1), buffer_2=DoFn.StateParam(BUFFER_STATE_2), timer_1=DoFn.TimerParam(EXPIRY_TIMER_1), timer_2=DoFn.TimerParam(EXPIRY_TIMER_2)): yield element
def process(self, element, emit=DoFn.TimerParam(EMIT_TIMER_FAMILY), gc=DoFn.TimerParam(GC_TIMER)): emit.set(10, dynamic_timer_tag='emit1') emit.set(20, dynamic_timer_tag='emit2') emit.set(30, dynamic_timer_tag='emit3') gc.set(40)
def on_expiry_1(self, window=DoFn.WindowParam, timestamp=DoFn.TimestampParam, key=DoFn.KeyParam, buffer=DoFn.StateParam(BUFFER_STATE_1), timer_1=DoFn.TimerParam(EXPIRY_TIMER_1), timer_2=DoFn.TimerParam(EXPIRY_TIMER_2), timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)): yield 'expired1'
def process(self, element, emit1=DoFn.TimerParam(EMIT_TIMER_FAMILY1), emit2=DoFn.TimerParam(EMIT_TIMER_FAMILY2)): emit1.set(10, dynamic_timer_tag='emit11') emit1.set(20, dynamic_timer_tag='emit12') emit1.set(30, dynamic_timer_tag='emit13') emit2.set(30, dynamic_timer_tag='emit21') emit2.set(20, dynamic_timer_tag='emit22') emit2.set(10, dynamic_timer_tag='emit23')
def test_spec_construction(self): BagStateSpec('statename', VarIntCoder()) with self.assertRaises(TypeError): BagStateSpec(123, VarIntCoder()) CombiningValueStateSpec('statename', VarIntCoder(), TopCombineFn(10)) with self.assertRaises(TypeError): CombiningValueStateSpec(123, VarIntCoder(), TopCombineFn(10)) with self.assertRaises(TypeError): CombiningValueStateSpec('statename', VarIntCoder(), object()) SetStateSpec('setstatename', VarIntCoder()) with self.assertRaises(TypeError): SetStateSpec(123, VarIntCoder()) with self.assertRaises(TypeError): SetStateSpec('setstatename', object()) ReadModifyWriteStateSpec('valuestatename', VarIntCoder()) with self.assertRaises(TypeError): ReadModifyWriteStateSpec(123, VarIntCoder()) with self.assertRaises(TypeError): ReadModifyWriteStateSpec('valuestatename', object()) # TODO: add more spec tests with self.assertRaises(ValueError): DoFn.TimerParam(BagStateSpec('elements', BytesCoder())) TimerSpec('timer', TimeDomain.WATERMARK) TimerSpec('timer', TimeDomain.REAL_TIME) with self.assertRaises(ValueError): TimerSpec('timer', 'bogus_time_domain') with self.assertRaises(ValueError): DoFn.StateParam(TimerSpec('timer', TimeDomain.WATERMARK))
def process(self, element, emit=DoFn.TimerParam(EMIT_TIMER_FAMILY)): if element[1] == 'set': emit.set(10, dynamic_timer_tag='emit1') emit.set(20, dynamic_timer_tag='emit2') emit.set(30, dynamic_timer_tag='emit3') if element[1] == 'clear': emit.clear(dynamic_timer_tag='emit3')
def on_buffering_timer( self, element_state=DoFn.StateParam(ELEMENT_STATE), count_state=DoFn.StateParam(COUNT_STATE), buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)): return self.flush_batch(element_state, count_state, buffering_timer)
def process(self, element, buffer=DoFn.StateParam(BUFFER_STATE), timer1=DoFn.TimerParam(EXPIRY_TIMER)): unused_key, value = element buffer.add(value) timer1.set(20)
def process(self, element, window=DoFn.WindowParam, element_state=DoFn.StateParam(ELEMENT_STATE), count_state=DoFn.StateParam(COUNT_STATE), window_timer=DoFn.TimerParam(WINDOW_TIMER), buffering_timer=DoFn.TimerParam(BUFFERING_TIMER)): # Allowed lateness not supported in Python SDK # https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data window_timer.set(window.end) element_state.add(element) count_state.add(1) count = count_state.read() if count == 1 and max_buffering_duration_secs > 0: # This is the first element in batch. Start counting buffering time if a # limit was set. buffering_timer.set(clock() + max_buffering_duration_secs) if count >= batch_size: return self.flush_batch(element_state, count_state, buffering_timer)
def process(self, element, state=DoFn.StateParam(BUFFER_STATE), timer=DoFn.TimerParam(UNMATCHED_TIMER)): key, value = element existing_values = list(state.read()) if not existing_values: state.add(value) timer.set(100) else: yield b'Record<%s,%s,%s>' % (key, existing_values[0], value) state.clear() timer.clear()
def process(self, element, window=DoFn.WindowParam, element_state=DoFn.StateParam(ELEMENT_STATE), count_state=DoFn.StateParam(COUNT_STATE), expiry_timer=DoFn.TimerParam(EXPIRY_TIMER)): # Allowed lateness not supported in Python SDK # https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data expiry_timer.set(window.end) element_state.add(element) count_state.add(1) count = count_state.read() if count >= batch_size: batch = [element for element in element_state.read()] yield batch element_state.clear() count_state.clear()
def process(self, element, timer1=DoFn.TimerParam(EXPIRY_TIMER_1), timer2=DoFn.TimerParam(EXPIRY_TIMER_2)): pass
def expiry_callback(self, element, t1=DoFn.TimerParam(EXPIRY_TIMER_2), t2=DoFn.TimerParam(EXPIRY_TIMER_2)): yield element
def process(self, element, t1=DoFn.TimerParam(TIMER), t2=DoFn.TimerParam(TIMER)): yield element
def expiry_callback(self, element, timer=DoFn.TimerParam(EXPIRY_TIMER)): yield element
def test_param_construction(self): with self.assertRaises(ValueError): DoFn.StateParam(TimerSpec('timer', TimeDomain.WATERMARK)) with self.assertRaises(ValueError): DoFn.TimerParam(BagStateSpec('elements', BytesCoder()))
def process(self, element, timer1=DoFn.TimerParam(EMIT_TIMER_1)): timer1.set(10)
def process(self, element, emit=DoFn.TimerParam(EMIT_CLEAR_SET_TIMER)): yield ('1', 'set') emit.set(1)
def on_expiry_family(self, dynamic_timer=DoFn.TimerParam(EXPIRY_TIMER_FAMILY), dynamic_timer_tag=DoFn.DynamicTimerTagParam): yield (dynamic_timer_tag, 'expired_dynamic_timer')
def expiry_family_callback( self, element, dynamic_timer=DoFn.TimerParam(EXPIRY_TIMER_FAMILY)): yield element
def process(self, element, emit=DoFn.TimerParam(EMIT_TIMER_FAMILY)): emit.set(10) emit.set(20, dynamic_timer_tag='')
def process(self, element, dynamic_timer_1=DoFn.TimerParam(EXPIRY_TIMER_FAMILY), dynamic_timer_2=DoFn.TimerParam(EXPIRY_TIMER_FAMILY)): yield element
def on_expiry_3(self, buffer_1=DoFn.StateParam(BUFFER_STATE_1), buffer_2=DoFn.StateParam(BUFFER_STATE_2), timer_3=DoFn.TimerParam(EXPIRY_TIMER_3)): yield 'expired3'
def expiry_callback(self, buffer=DoFn.StateParam(BUFFER_STATE), timer=DoFn.TimerParam(EXPIRY_TIMER)): yield ''.join(str(x) for x in sorted(buffer.read()))