Ejemplo n.º 1
0
  def test_read_n(self):
    """Test that the stream only reads 'n' elements."""

    self.mock_result.set_state(PipelineState.DONE)
    self.cache.write(list(range(5)), 'full', self.cache_key)
    self.cache.save_pcoder(None, 'full', self.cache_key)

    stream = ElementStream(
        self.pcoll, '', self.cache_key, max_n=1, max_duration_secs=1)
    self.assertEqual(list(stream.read()), [0])
    self.assertTrue(stream.is_done())

    stream = ElementStream(
        self.pcoll, '', self.cache_key, max_n=2, max_duration_secs=1)
    self.assertEqual(list(stream.read()), [0, 1])
    self.assertTrue(stream.is_done())

    stream = ElementStream(
        self.pcoll, '', self.cache_key, max_n=5, max_duration_secs=1)
    self.assertEqual(list(stream.read()), list(range(5)))
    self.assertTrue(stream.is_done())

    # Test that if the user asks for more than in the cache it still returns.
    stream = ElementStream(
        self.pcoll, '', self.cache_key, max_n=10, max_duration_secs=1)
    self.assertEqual(list(stream.read()), list(range(5)))
    self.assertTrue(stream.is_done())
    def test_read_duration(self):
        """Test that the stream only reads a 'duration' of elements."""

        values = (FileRecordsBuilder(tag=self.cache_key)
                  .advance_processing_time(1)
                  .add_element(element=0, event_time_secs=0)
                  .advance_processing_time(1)
                  .add_element(element=1, event_time_secs=1)
                  .advance_processing_time(1)
                  .add_element(element=2, event_time_secs=3)
                  .advance_processing_time(1)
                  .add_element(element=3, event_time_secs=4)
                  .advance_processing_time(1)
                  .add_element(element=4, event_time_secs=5)
                  .build()) # yapf: disable

        self.mock_result.set_state(PipelineState.DONE)
        self.cache.write(values, 'full', self.cache_key)
        self.cache.save_pcoder(None, 'full', self.cache_key)

        # The elements read from the cache are TestStreamFileRecord instances and
        # have the underlying elements encoded. This method decodes the elements
        # from the TestStreamFileRecord.
        def get_elements(events):
            coder = coders.FastPrimitivesCoder()
            elements = []
            for e in events:
                if not isinstance(e, TestStreamFileRecord):
                    continue

                if e.recorded_event.element_event:
                    elements += ([
                        coder.decode(el.encoded_element)
                        for el in e.recorded_event.element_event.elements
                    ])
            return elements

        # The following tests a progression of reading different durations from the
        # cache.
        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=1)
        self.assertSequenceEqual(get_elements(stream.read()), [0])

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=2)
        self.assertSequenceEqual(get_elements(stream.read()), [0, 1])

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=10)
        self.assertSequenceEqual(get_elements(stream.read()), [0, 1, 2, 3, 4])
Ejemplo n.º 3
0
    def test_read_duration(self):
        """Test that the stream only reads a 'duration' of elements."""
        def as_windowed_value(element):
            return WindowedValueHolder(WindowedValue(element, 0, []))

        values = (FileRecordsBuilder(tag=self.cache_key)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(0), event_time_secs=0)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(1), event_time_secs=1)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(2), event_time_secs=3)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(3), event_time_secs=4)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(4), event_time_secs=5)
                  .build()) # yapf: disable

        values = [
            v.recorded_event for v in values
            if isinstance(v, beam_interactive_api_pb2.TestStreamFileRecord)
        ]

        self.mock_result.set_state(PipelineState.DONE)
        self.cache.write(values, 'full', self.cache_key)
        self.cache.save_pcoder(coders.FastPrimitivesCoder(), 'full',
                               self.cache_key)

        # The following tests a progression of reading different durations from the
        # cache.

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=1)
        self.assertSequenceEqual([e.value for e in stream.read()], [0])

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=2)
        self.assertSequenceEqual([e.value for e in stream.read()], [0, 1])

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=10)
        self.assertSequenceEqual([e.value for e in stream.read()],
                                 [0, 1, 2, 3, 4])
Ejemplo n.º 4
0
  def test_done_if_terminated(self):
    """Test that terminating the job sets the stream as done."""

    self.cache.write(['expected'], 'full', self.cache_key)
    self.cache.save_pcoder(None, 'full', self.cache_key)

    stream = ElementStream(
        self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=10)

    self.assertFalse(stream.is_done())
    self.assertEqual(list(stream.read(tail=False))[0], 'expected')

    # The limiters were not reached, so the stream is not done yet.
    self.assertFalse(stream.is_done())

    self.mock_result.set_state(PipelineState.DONE)
    self.assertEqual(list(stream.read(tail=False))[0], 'expected')

    # The underlying pipeline is terminated, so the stream won't yield new
    # elements.
    self.assertTrue(stream.is_done())
Ejemplo n.º 5
0
  def test_read(self):
    """Test reading and if a stream is done no more elements are returned."""

    self.mock_result.set_state(PipelineState.DONE)
    self.cache.write(['expected'], 'full', self.cache_key)
    self.cache.save_pcoder(None, 'full', self.cache_key)

    stream = ElementStream(
        self.pcoll, '', self.cache_key, max_n=1, max_duration_secs=1)

    self.assertFalse(stream.is_done())
    self.assertEqual(list(stream.read())[0], 'expected')
    self.assertTrue(stream.is_done())