예제 #1
0
 def test_sliding_windows_assignment_fraction(self):
     windowfn = SlidingWindows(size=3.5, period=2.5, offset=1.5)
     self.assertEqual([IntervalWindow(1.5, 5.0),
                       IntervalWindow(-1.0, 2.5)],
                      windowfn.assign(context('v', 1.7)))
     self.assertEqual([IntervalWindow(1.5, 5.0)],
                      windowfn.assign(context('v', 3)))
예제 #2
0
파일: window_test.py 프로젝트: ziel/beam
 def test_sliding_windows_assignment(self):
   windowfn = SlidingWindows(size=15, period=5, offset=2)
   expected = [IntervalWindow(7, 22),
               IntervalWindow(2, 17),
               IntervalWindow(-3, 12)]
   self.assertEqual(expected, windowfn.assign(context('v', 7)))
   self.assertEqual(expected, windowfn.assign(context('v', 8)))
   self.assertEqual(expected, windowfn.assign(context('v', 11)))
예제 #3
0
 def test_sliding_windows_assignment(self):
   windowfn = SlidingWindows(size=15, period=5, offset=2)
   expected = [IntervalWindow(7, 22),
               IntervalWindow(2, 17),
               IntervalWindow(-3, 12)]
   self.assertEqual(expected, windowfn.assign(context('v', 7)))
   self.assertEqual(expected, windowfn.assign(context('v', 8)))
   self.assertEqual(expected, windowfn.assign(context('v', 11)))
예제 #4
0
    def test_sdf_with_windowed_timestamped_input(self):
        with TestPipeline(runner='DirectRunner') as p:
            result = (p
                      | beam.Create([1, 3, 5, 10])
                      | beam.FlatMap(lambda t: [
                          TimestampedValue(('A', t), t),
                          TimestampedValue(('B', t), t)
                      ])
                      | beam.WindowInto(
                          SlidingWindows(10, 5),
                          accumulation_mode=AccumulationMode.DISCARDING)
                      | beam.ParDo(ExpandStrings(record_window=True)))

            expected_result = [
                'A:1:-5',
                'A:1:0',
                'A:3:-5',
                'A:3:0',
                'A:5:0',
                'A:5:5',
                'A:10:5',
                'A:10:10',
                'B:1:-5',
                'B:1:0',
                'B:3:-5',
                'B:3:0',
                'B:5:0',
                'B:5:5',
                'B:10:5',
                'B:10:10',
            ]
            assert_that(result, equal_to(expected_result))
예제 #5
0
 def test_windowfn_encoding(self):
     for window_fn in (GlobalWindows(), FixedWindows(37),
                       SlidingWindows(2, 389), Sessions(5077)):
         context = pipeline_context.PipelineContext()
         self.assertEqual(
             window_fn,
             WindowFn.from_runner_api(window_fn.to_runner_api(context),
                                      context))
예제 #6
0
파일: window_test.py 프로젝트: ziel/beam
 def test_sliding_windows(self):
   with TestPipeline() as p:
     pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3)
     result = (pcoll
               | 'w' >> WindowInto(SlidingWindows(period=2, size=4))
               | GroupByKey()
               | reify_windows)
     expected = [('key @ [-2.0, 2.0)', [1]),
                 ('key @ [0.0, 4.0)', [1, 2, 3]),
                 ('key @ [2.0, 6.0)', [2, 3])]
     assert_that(result, equal_to(expected))
예제 #7
0
파일: window_test.py 프로젝트: ziel/beam
 def test_windowing_encoding(self):
   for windowing in (
       Windowing(GlobalWindows()),
       Windowing(FixedWindows(1, 3), AfterCount(6),
                 accumulation_mode=AccumulationMode.ACCUMULATING),
       Windowing(SlidingWindows(10, 15, 21), AfterCount(28),
                 timestamp_combiner=TimestampCombiner.OUTPUT_AT_LATEST,
                 accumulation_mode=AccumulationMode.DISCARDING)):
     context = pipeline_context.PipelineContext()
     self.assertEqual(
         windowing,
         Windowing.from_runner_api(windowing.to_runner_api(context), context))
예제 #8
0
파일: window_test.py 프로젝트: ziel/beam
 def test_rewindow(self):
   with TestPipeline() as p:
     result = (p
               | Create([(k, k) for k in range(10)])
               | Map(lambda x_t1: TimestampedValue(x_t1[0], x_t1[1]))
               | 'window' >> WindowInto(SlidingWindows(period=2, size=6))
               # Per the model, each element is now duplicated across
               # three windows. Rewindowing must preserve this duplication.
               | 'rewindow' >> WindowInto(FixedWindows(5))
               | 'rewindow2' >> WindowInto(FixedWindows(5))
               | Map(lambda v: ('key', v))
               | GroupByKey())
     assert_that(result, equal_to([('key', sorted([0, 1, 2, 3, 4] * 3)),
                                   ('key', sorted([5, 6, 7, 8, 9] * 3))]))
예제 #9
0
  def test_window_param(self):
    class TestDoFn(DoFn):
      def process(self, element, window=DoFn.WindowParam):
        yield (element, (float(window.start), float(window.end)))

    pipeline = TestPipeline()
    pcoll = (pipeline
             | Create([1, 7])
             | Map(lambda x: TimestampedValue(x, x))
             | WindowInto(windowfn=SlidingWindows(10, 5))
             | ParDo(TestDoFn()))
    assert_that(pcoll, equal_to([(1, (-5, 5)), (1, (0, 10)),
                                 (7, (0, 10)), (7, (5, 15))]))
    pipeline.run()
예제 #10
0
 def test_reshuffle_sliding_window(self):
   pipeline = TestPipeline()
   data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 4)]
   window_size = 2
   expected_data = [(1, [1, 2, 4]), (2, [1, 2]), (3, [1])] * window_size
   before_reshuffle = (pipeline
                       | beam.Create(data)
                       | beam.WindowInto(SlidingWindows(
                           size=window_size, period=1))
                       | beam.GroupByKey())
   assert_that(before_reshuffle, equal_to(expected_data),
               label='before_reshuffle')
   after_reshuffle = before_reshuffle | beam.Reshuffle()
   # If Reshuffle applies the sliding window function a second time there
   # should be extra values for each key.
   assert_that(after_reshuffle, equal_to(expected_data),
               label='after reshuffle')
   pipeline.run()
예제 #11
0
def run():
    pipeline_options = PipelineOptions(streaming=True)
    resolution = pipeline_options.view_as(MyOptions).resolution.get()
    with beam.Pipeline(options=pipeline_options) as p:
        subscription_id = 'projects/iex-stream/subscriptions/iex-aggregate-' + str(
            resolution)
        lines = (p | beam.io.ReadFromPubSub(
            subscription=subscription_id).with_output_types(bytes)
                 | 'decode' >> beam.Map(lambda x: x.decode('utf-8'))
                 | beam.Map(json.loads))

        schema = 'symbol:STRING,latest_price:FLOAT,window_end:TIMESTAMP,event_time:TIMESTAMP,resolution_minutes:INTEGER'
        (lines
         | 'CreateWindow' >> beam.WindowInto(
             SlidingWindows(60 * resolution, 10, 5))
         | 'AddWindowEndTimestamp' >> beam.ParDo(
             AddTimestamp(resolution=resolution))
         | 'WriteToBigQuery' >> beam.io.WriteToBigQuery('iex.quote',
                                                        schema=schema))
예제 #12
0
    def test_sliding_windows_simple_watermark(self):
        # yapf: disable
        test_stream = (
            TestStream()
              .advance_watermark_to(0)
              .add_elements([('k1', 1), ('k2', 1), ('k1', 1), ('k2', 1)])
              .add_elements([('k1', 1), ('k2', 1)])
              .advance_watermark_to(1)
              .add_elements([('k1', 2), ('k2', 2)])
              .add_elements([('k1', 2), ('k2', 2)])
              .advance_watermark_to(2)
              .add_elements([('k1', 3), ('k2', 3)])
              .add_elements([('k1', 3), ('k2', 3)])
              .advance_watermark_to_infinity())
        # yapf: enable

        # Fixed, one-second windows with DefaultTrigger (after watermark)
        windowing = Windowing(SlidingWindows(2, 1))

        with TestPipeline() as p:
            result = (
                p
                | test_stream
                | WindowInto(windowing.windowfn)
                | ParDo(trigger_manager._ReifyWindows())
                | ParDo(trigger_manager._GroupBundlesByKey())
                | ParDo(trigger_manager.GeneralTriggerManagerDoFn(windowing))
                |
                Map(lambda elm:
                    (elm[0], elm[1][0].windows[0], [v.value for v in elm[1]])))
            assert_that(
                result,
                equal_to([
                    ('k1', IntervalWindow(-1, 1), [1, 1, 1]),
                    ('k2', IntervalWindow(-1, 1), [1, 1, 1]),
                    ('k1', IntervalWindow(0, 2), [1, 1, 1, 2, 2]),
                    ('k2', IntervalWindow(0, 2), [1, 1, 1, 2, 2]),
                    ('k1', IntervalWindow(1, 3), [2, 2, 3, 3]),
                    ('k2', IntervalWindow(1, 3), [2, 2, 3, 3]),
                    ('k1', IntervalWindow(2, 4), [3, 3]),
                    ('k2', IntervalWindow(2, 4), [3, 3]),
                ]))
예제 #13
0
 def test_sliding_windows_assignment_fraction_large_offset(self):
   windowfn = SlidingWindows(size=3.5, period=2.5, offset=4.0)
   self.assertEqual([IntervalWindow(1.5, 5.0), IntervalWindow(-1.0, 2.5)],
                    windowfn.assign(context('v', 1.7)))
   self.assertEqual([IntervalWindow(4.0, 7.5), IntervalWindow(1.5, 5.0)],
                    windowfn.assign(context('v', 4.5)))