Exemple #1
0
    def test_read_duration(self):
        """Test that the stream only reads a 'duration' of elements."""
        def as_windowed_value(element):
            return WindowedValueHolder(WindowedValue(element, 0, []))

        values = (FileRecordsBuilder(tag=self.cache_key)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(0), event_time_secs=0)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(1), event_time_secs=1)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(2), event_time_secs=3)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(3), event_time_secs=4)
                  .advance_processing_time(1)
                  .add_element(element=as_windowed_value(4), event_time_secs=5)
                  .build()) # yapf: disable

        values = [
            v.recorded_event for v in values
            if isinstance(v, TestStreamFileRecord)
        ]

        self.mock_result.set_state(PipelineState.DONE)
        self.cache.write(values, 'full', self.cache_key)
        self.cache.save_pcoder(coders.FastPrimitivesCoder(), 'full',
                               self.cache_key)

        # The following tests a progression of reading different durations from the
        # cache.

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=1)
        self.assertSequenceEqual([e.value for e in stream.read()], [0])

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=2)
        self.assertSequenceEqual([e.value for e in stream.read()], [0, 1])

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=100,
                               max_duration_secs=10)
        self.assertSequenceEqual([e.value for e in stream.read()],
                                 [0, 1, 2, 3, 4])
  def test_done_if_terminated(self):
    """Test that terminating the job sets the stream as done."""

    self.cache.write(['expected'], 'full', self.cache_key)
    self.cache.save_pcoder(None, 'full', self.cache_key)

    stream = ElementStream(
        self.pcoll, '', self.cache_key, max_n=100, max_duration_secs=10)

    self.assertFalse(stream.is_done())
    self.assertEqual(list(stream.read(tail=False))[0], 'expected')

    # The limiters were not reached, so the stream is not done yet.
    self.assertFalse(stream.is_done())

    self.mock_result.set_state(PipelineState.DONE)
    self.assertEqual(list(stream.read(tail=False))[0], 'expected')

    # The underlying pipeline is terminated, so the stream won't yield new
    # elements.
    self.assertTrue(stream.is_done())
Exemple #3
0
    def test_read_n(self):
        """Test that the stream only reads 'n' elements."""

        self.mock_result.set_state(PipelineState.DONE)
        self.cache.write(list(range(5)), 'full', self.cache_key)
        self.cache.save_pcoder(None, 'full', self.cache_key)

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=1,
                               max_duration_secs=1)
        self.assertEqual(list(stream.read()), [0])
        self.assertTrue(stream.is_done())

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=2,
                               max_duration_secs=1)
        self.assertEqual(list(stream.read()), [0, 1])
        self.assertTrue(stream.is_done())

        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=5,
                               max_duration_secs=1)
        self.assertEqual(list(stream.read()), list(range(5)))
        self.assertTrue(stream.is_done())

        # Test that if the user asks for more than in the cache it still returns.
        stream = ElementStream(self.pcoll,
                               '',
                               self.cache_key,
                               max_n=10,
                               max_duration_secs=1)
        self.assertEqual(list(stream.read()), list(range(5)))
        self.assertTrue(stream.is_done())