コード例 #1
0
  def read(self, tail=True):
    # type: (boolean) -> Any

    """Reads the elements currently recorded."""

    # Get the cache manager and wait until the file exists.
    cache_manager = ie.current_env().get_cache_manager(self._pipeline)

    # Retrieve the coder for the particular PCollection which will be used to
    # decode elements read from cache.
    coder = cache_manager.load_pcoder('full', self._cache_key)

    # Read the elements from the cache.
    # Import limiters here to prevent a circular import.
    from apache_beam.runners.interactive.options.capture_limiters import CountLimiter
    from apache_beam.runners.interactive.options.capture_limiters import ProcessingTimeLimiter
    reader, _ = cache_manager.read('full', self._cache_key, tail=tail)

    # Because a single TestStreamFileRecord can yield multiple elements, we
    # limit the count again here in the to_element_list call.
    #
    # There are two ways of exiting this loop either a limiter was triggered or
    # all elements from the cache were read. In the latter situation, it may be
    # the case that the pipeline was still running. Thus, another invocation of
    # `read` will yield new elements.
    count_limiter = CountLimiter(self._n)
    time_limiter = ProcessingTimeLimiter(self._duration_secs)
    limiters = (count_limiter, time_limiter)
    for e in utils.to_element_list(reader,
                                   coder,
                                   include_window_info=True,
                                   n=self._n,
                                   include_time_events=True):

      # From the to_element_list we either get TestStreamPayload.Events if
      # include_time_events or decoded elements from the reader. Make sure we
      # only count the decoded elements to break early.
      if isinstance(e, TestStreamPayload.Event):
        time_limiter.update(e)
      else:
        count_limiter.update(e)
        yield e

      if any(l.is_triggered() for l in limiters):
        break

    # A limiter being triggered means that we have fulfilled the user's request.
    # This implies that reading from the cache again won't yield any new
    # elements. WLOG, this applies to the user pipeline being terminated.
    if any(l.is_triggered()
           for l in limiters) or ie.current_env().is_terminated(self._pipeline):
      self._done = True
コード例 #2
0
ファイル: capture_limiters_test.py プロジェクト: zhoufek/beam
  def test_processing_time_limiter(self):
    limiter = ProcessingTimeLimiter(max_duration_secs=2)

    e = beam_runner_api_pb2.TestStreamPayload.Event()
    e.processing_time_event.advance_duration = int(1 * 1e6)
    limiter.update(e)
    self.assertFalse(limiter.is_triggered())

    e = beam_runner_api_pb2.TestStreamPayload.Event()
    e.processing_time_event.advance_duration = int(2 * 1e6)
    limiter.update(e)
    self.assertTrue(limiter.is_triggered())
コード例 #3
0
    def test_processing_time_limiter(self):
        limiter = ProcessingTimeLimiter(max_duration_secs=2)

        r = TestStreamFileRecord()
        r.recorded_event.processing_time_event.advance_duration = int(1 * 1e6)
        limiter.update(r)
        self.assertFalse(limiter.is_triggered())

        r = TestStreamFileRecord()
        r.recorded_event.processing_time_event.advance_duration = int(2 * 1e6)
        limiter.update(r)
        self.assertTrue(limiter.is_triggered())
コード例 #4
0
    def test_single_reader_with_processing_time_limiter(self):
        """Tests that we expect to see all the correctly emitted TestStreamPayloads.
    """
        CACHED_PCOLLECTION_KEY = repr(CacheKey('arbitrary_key', '', '', ''))

        values = (FileRecordsBuilder(tag=CACHED_PCOLLECTION_KEY)
                  .advance_processing_time(1e-6)
                  .add_element(element=0, event_time_secs=0)
                  .advance_processing_time(1)
                  .add_element(element=1, event_time_secs=1)
                  .advance_processing_time(1)
                  .add_element(element=2, event_time_secs=2)
                  .advance_processing_time(1)
                  .add_element(element=3, event_time_secs=2)
                  .advance_processing_time(1)
                  .add_element(element=4, event_time_secs=2)
                  .build()) # yapf: disable

        cache = StreamingCache(cache_dir=None)
        cache.write(values, CACHED_PCOLLECTION_KEY)

        reader, _ = cache.read(CACHED_PCOLLECTION_KEY,
                               limiters=[ProcessingTimeLimiter(2)])
        coder = coders.FastPrimitivesCoder()
        events = list(reader)

        # Units here are in microseconds.
        # Expects that the elements are a slice of the original values where all
        # processing time is less than the duration.
        expected = [
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=1)),
            TestStreamPayload.Event(
                element_event=TestStreamPayload.Event.AddElements(
                    elements=[
                        TestStreamPayload.TimestampedElement(
                            encoded_element=coder.encode(0), timestamp=0)
                    ],
                    tag=CACHED_PCOLLECTION_KEY)),
            TestStreamPayload.Event(processing_time_event=TestStreamPayload.
                                    Event.AdvanceProcessingTime(
                                        advance_duration=1 * 10**6)),
            TestStreamPayload.Event(element_event=TestStreamPayload.Event.
                                    AddElements(elements=[
                                        TestStreamPayload.TimestampedElement(
                                            encoded_element=coder.encode(1),
                                            timestamp=1 * 10**6)
                                    ],
                                                tag=CACHED_PCOLLECTION_KEY)),
        ]
        self.assertSequenceEqual(events, expected)
コード例 #5
0
    def read(self, tail=True):
        # type: (boolean) -> Any
        """Reads the elements currently recorded."""

        # Get the cache manager and wait until the file exists.
        cache_manager = ie.current_env().get_cache_manager(self._pipeline)

        # Retrieve the coder for the particular PCollection which will be used to
        # decode elements read from cache.
        coder = cache_manager.load_pcoder('full', self._cache_key)

        # Read the elements from the cache.
        limiters = [
            CountLimiter(self._n),
            ProcessingTimeLimiter(self._duration_secs)
        ]
        reader, _ = cache_manager.read('full', self._cache_key, tail=tail)

        # Because a single TestStreamFileRecord can yield multiple elements, we
        # limit the count again here in the to_element_list call.
        #
        # There are two ways of exiting this loop either a limiter was triggered or
        # all elements from the cache were read. In the latter situation, it may be
        # the case that the pipeline was still running. Thus, another invocation of
        # `read` will yield new elements.
        for e in utils.to_element_list(reader,
                                       coder,
                                       include_window_info=True,
                                       n=self._n):
            for l in limiters:
                l.update(e)

            yield e

            if any(l.is_triggered() for l in limiters):
                break

        # A limiter being triggered means that we have fulfilled the user's request.
        # This implies that reading from the cache again won't yield any new
        # elements. WLOG, this applies to the user pipeline being terminated.
        if any(l.is_triggered()
               for l in limiters) or ie.current_env().is_terminated(
                   self._pipeline):
            self._done = True