def _test_stream_events_before_target(self, target_timestamp): """Reads the next iteration of elements from each stream. Retrieves an element from each stream iff the most recently read timestamp from that stream is less than the target_timestamp. Since the amount of events may not fit into memory, this StreamingCache reads at most one element from each stream at a time. """ records = [] for tag, r in self._readers.items(): # The target_timestamp is the maximum timestamp that was read from the # stream. Some readers may have elements that are less than this. Thus, # we skip all readers that already have elements that are at this # timestamp so that we don't read everything into memory. if self._stream_times[tag] >= target_timestamp: continue try: record = next(r).recorded_event if record.HasField('processing_time_event'): self._stream_times[tag] += timestamp.Duration( micros=record.processing_time_event. advance_duration) records.append((tag, record, self._stream_times[tag])) except StopIteration: pass return records
def deferred_status(self): # type: () -> Optional[Tuple[Any, Timestamp]] """Returns deferred work which is produced by ``defer_remainder()``. When there is a self-checkpoint performed, the system needs to fulfill the DelayedBundleApplication with deferred_work for a ProcessBundleResponse. The system calls this API to get deferred_residual with watermark together to help the runner to schedule a future work. Returns: (deferred_residual, time_delay) if having any residual, else None. """ if self._deferred_residual: # If _deferred_watermark is None, create Duration(0). if not self._deferred_watermark: self._deferred_watermark = timestamp.Duration() # If an absolute timestamp is provided, calculate the delta between # the absoluted time and the time deferred_status() is called. elif isinstance(self._deferred_watermark, timestamp.Timestamp): self._deferred_watermark = (self._deferred_watermark - timestamp.Timestamp.now()) # If a Duration is provided, the deferred time should be: # provided duration - the spent time since the defer_remainder() is # called. elif isinstance(self._deferred_watermark, timestamp.Duration): self._deferred_watermark -= (timestamp.Timestamp.now() - self._deferred_timestamp) return self._deferred_residual, self._deferred_watermark return None
def test_self_checkpoint_with_relative_time(self): threadsafe_tracker = iobase.ThreadsafeRestrictionTracker( OffsetRestrictionTracker(OffsetRange(0, 10))) threadsafe_tracker.defer_remainder(timestamp.Duration(100)) time.sleep(2) _, deferred_time = threadsafe_tracker.deferred_status() self.assertTrue(isinstance(deferred_time, timestamp.Duration)) # The expectation = 100 - 2 - some_delta self.assertTrue(deferred_time <= 98)
def test_api_expose(self): threadsafe_tracker = iobase.ThreadsafeRestrictionTracker( OffsetRestrictionTracker(OffsetRange(0, 10))) tracker_view = iobase.RestrictionTrackerView(threadsafe_tracker) current_restriction = tracker_view.current_restriction() self.assertEqual(current_restriction, OffsetRange(0, 10)) self.assertTrue(tracker_view.try_claim(0)) tracker_view.defer_remainder() deferred_remainder, deferred_watermark = ( threadsafe_tracker.deferred_status()) self.assertEqual(deferred_remainder, OffsetRange(1, 10)) self.assertEqual(deferred_watermark, timestamp.Duration())
def test_self_checkpoint_with_absolute_time(self): threadsafe_tracker = iobase.ThreadsafeRestrictionTracker( OffsetRestrictionTracker(OffsetRange(0, 10))) now = timestamp.Timestamp.now() schedule_time = now + timestamp.Duration(100) self.assertTrue(isinstance(schedule_time, timestamp.Timestamp)) threadsafe_tracker.defer_remainder(schedule_time) time.sleep(2) _, deferred_time = threadsafe_tracker.deferred_status() self.assertTrue(isinstance(deferred_time, timestamp.Duration)) # The expectation = # schedule_time - the time when deferred_status is called - some_delta self.assertTrue(deferred_time <= 98)
def from_runner_api(proto, element_coder): if proto.HasField('element_event'): return ElementEvent([ TimestampedValue( element_coder.decode(tv.encoded_element), timestamp.Timestamp(micros=1000 * tv.timestamp)) for tv in proto.element_event.elements ]) elif proto.HasField('watermark_event'): return WatermarkEvent( timestamp.Timestamp(micros=1000 * proto.watermark_event.new_watermark)) elif proto.HasField('processing_time_event'): return ProcessingTimeEvent( timestamp.Duration( micros=1000 * proto.processing_time_event.advance_duration)) else: raise ValueError('Unknown TestStream Event type: %s' % proto.WhichOneof('event'))