Ejemplo n.º 1
0
 def test_basic_test_stream(self):
     test_stream = (TestStream()
                    .advance_watermark_to(0)
                    .add_elements([
                        'a',
                        WindowedValue('b', 3, []),
                        TimestampedValue('c', 6)])
                    .advance_processing_time(10)
                    .advance_watermark_to(8)
                    .add_elements(['d'])
                    .advance_watermark_to_infinity())  # yapf: disable
     self.assertEqual(test_stream._events, [
         WatermarkEvent(0),
         ElementEvent([
             TimestampedValue('a', 0),
             TimestampedValue('b', 3),
             TimestampedValue('c', 6),
         ]),
         ProcessingTimeEvent(10),
         WatermarkEvent(8),
         ElementEvent([
             TimestampedValue('d', 8),
         ]),
         WatermarkEvent(timestamp.MAX_TIMESTAMP),
     ])
Ejemplo n.º 2
0
  def test_stream_payload_to_events(payload, coder):
    """Returns a TestStream Python event object from a TestStream event Proto.
    """
    if payload.HasField('element_event'):
      element_event = payload.element_event
      elements = [
          TimestampedValue(
              coder.decode(e.encoded_element), Timestamp(micros=e.timestamp))
          for e in element_event.elements
      ]
      return ElementEvent(timestamped_values=elements, tag=element_event.tag)

    if payload.HasField('watermark_event'):
      watermark_event = payload.watermark_event
      return WatermarkEvent(
          Timestamp(micros=watermark_event.new_watermark),
          tag=watermark_event.tag)

    if payload.HasField('processing_time_event'):
      processing_time_event = payload.processing_time_event
      return ProcessingTimeEvent(
          Duration(micros=processing_time_event.advance_duration))

    raise RuntimeError(
        'Received a proto without the specified fields: {}'.format(payload))
Ejemplo n.º 3
0
  def _test_stream_init(self):
    """Sentinel value to hold the watermark of the TestStream to -inf.

    This sets a hold to ensure that the output watermarks of the output
    PCollections do not advance to +inf before their watermark holds are set.
    """
    return [WatermarkEvent(timestamp.MIN_TIMESTAMP,
                           _TestStream.WATERMARK_CONTROL_TAG)]
Ejemplo n.º 4
0
  def _watermark_starts(self, output_tags):
    """Sentinel values to hold the watermark of outputs to -inf.

    The output watermarks of the output PCollections (fake unbounded sources) in
    a TestStream are controlled by watermark holds. This sets the hold of each
    output PCollection so that the individual holds can be controlled by the
    given events.
    """
    return [WatermarkEvent(timestamp.MIN_TIMESTAMP, tag) for tag in output_tags]
Ejemplo n.º 5
0
  def _test_stream_start(self):
    """Sentinel value to move the watermark hold of the TestStream to +inf.

    This sets a hold to +inf such that the individual holds of the output
    PCollections are allowed to modify their individial output watermarks with
    their holds. This is because the calculation of the output watermark is a
    min over all input watermarks.
    """
    return [WatermarkEvent(timestamp.MAX_TIMESTAMP - timestamp.TIME_GRANULARITY,
                           _TestStream.WATERMARK_CONTROL_TAG)]
Ejemplo n.º 6
0
  def test_basic_execution(self):
    test_stream = (TestStream()
                   .advance_watermark_to(0)
                   .advance_processing_time(5)
                   .add_elements(['a', 'b', 'c'])
                   .advance_watermark_to(2)
                   .advance_processing_time(1)
                   .advance_watermark_to(4)
                   .advance_processing_time(1)
                   .advance_watermark_to(6)
                   .advance_processing_time(1)
                   .advance_watermark_to(8)
                   .advance_processing_time(1)
                   .advance_watermark_to(10)
                   .advance_processing_time(1)
                   .add_elements([TimestampedValue('1', 15),
                                  TimestampedValue('2', 15),
                                  TimestampedValue('3', 15)]))  # yapf: disable

    options = StandardOptions(streaming=True)
    p = TestPipeline(options=options)

    records = (
        p
        | test_stream
        | ReverseTestStream(sample_resolution_sec=1, output_tag=None))

    assert_that(
        records,
        equal_to_per_window({
            beam.window.GlobalWindow(): [
                [ProcessingTimeEvent(5), WatermarkEvent(0)],
                [
                    ElementEvent([
                        TimestampedValue('a', 0),
                        TimestampedValue('b', 0),
                        TimestampedValue('c', 0)
                    ])
                ],
                [ProcessingTimeEvent(1), WatermarkEvent(2000000)],
                [ProcessingTimeEvent(1), WatermarkEvent(4000000)],
                [ProcessingTimeEvent(1), WatermarkEvent(6000000)],
                [ProcessingTimeEvent(1), WatermarkEvent(8000000)],
                [ProcessingTimeEvent(1), WatermarkEvent(10000000)],
                [
                    ElementEvent([
                        TimestampedValue('1', 15),
                        TimestampedValue('2', 15),
                        TimestampedValue('3', 15)
                    ])
                ],
            ],
        }))

    p.run()
Ejemplo n.º 7
0
 def _test_stream_stop(self):
   """Sentinel value to close the watermark of the TestStream."""
   return [
       WatermarkEvent(
           timestamp.MAX_TIMESTAMP, _TestStream.WATERMARK_CONTROL_TAG)
   ]
Ejemplo n.º 8
0
 def _watermark_stops(self, output_tags):
   """Sentinel values to close the watermark of outputs."""
   return [WatermarkEvent(timestamp.MAX_TIMESTAMP, tag) for tag in output_tags]
Ejemplo n.º 9
0
    def test_windowing(self):
        test_stream = (TestStream()
                       .advance_watermark_to(0)
                       .add_elements(['a', 'b', 'c'])
                       .advance_processing_time(1)
                       .advance_processing_time(1)
                       .advance_processing_time(1)
                       .advance_processing_time(1)
                       .advance_processing_time(1)
                       .advance_watermark_to(5)
                       .add_elements(['1', '2', '3'])
                       .advance_processing_time(1)
                       .advance_watermark_to(6)
                       .advance_processing_time(1)
                       .advance_watermark_to(7)
                       .advance_processing_time(1)
                       .advance_watermark_to(8)
                       .advance_processing_time(1)
                       .advance_watermark_to(9)
                       .advance_processing_time(1)
                       .advance_watermark_to(10)
                       .advance_processing_time(1)
                       .advance_watermark_to(11)
                       .advance_processing_time(1)
                       .advance_watermark_to(12)
                       .advance_processing_time(1)
                       .advance_watermark_to(13)
                       .advance_processing_time(1)
                       .advance_watermark_to(14)
                       .advance_processing_time(1)
                       .advance_watermark_to(15)
                       .advance_processing_time(1)
                       )  # yapf: disable

        options = StandardOptions(streaming=True)
        p = TestPipeline(options=options)

        records = (p
                   | test_stream
                   | 'letter windows' >> beam.WindowInto(
                       FixedWindows(5),
                       accumulation_mode=trigger.AccumulationMode.DISCARDING)
                   | 'letter with key' >> beam.Map(lambda x: ('k', x))
                   | 'letter gbk' >> beam.GroupByKey()
                   | ReverseTestStream(sample_resolution_sec=1,
                                       output_tag=None))

        assert_that(
            records,
            equal_to_per_window({
                beam.window.GlobalWindow(): [
                    [ProcessingTimeEvent(5),
                     WatermarkEvent(4999998)],
                    [
                        ElementEvent([
                            TimestampedValue(('k', ['a', 'b', 'c']), 4.999999)
                        ])
                    ],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(5000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(6000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(7000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(8000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(9000000)],
                    [
                        ElementEvent([
                            TimestampedValue(('k', ['1', '2', '3']), 9.999999)
                        ])
                    ],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(10000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(11000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(12000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(13000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(14000000)],
                    [ProcessingTimeEvent(1),
                     WatermarkEvent(15000000)],
                ],
            }))

        p.run()