Ejemplo n.º 1
0
    def test_sessions_watermark(self):
        self.run_trigger_simple(
            Sessions(10),  # pyformat break
            AfterWatermark(),
            AccumulationMode.ACCUMULATING,
            [(1, 'a'), (2, 'b')],
            {IntervalWindow(1, 12): [set('ab')]},
            1,
            2)

        self.run_trigger_simple(
            Sessions(10),  # pyformat break
            AfterWatermark(),
            AccumulationMode.ACCUMULATING,
            [(1, 'a'), (2, 'b'), (15, 'c'), (16, 'd'), (30, 'z'), (9, 'e'),
             (10, 'f'), (30, 'y')],
            {
                IntervalWindow(1, 26): [set('abcdef')],
                IntervalWindow(30, 40): [set('yz')]
            },
            1,
            2,
            3,
            4,
            5,
            6)
Ejemplo n.º 2
0
 def test_sessions_repeatedly_after_count(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         Repeatedly(AfterCount(2)),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (15, 'b'), (6, 'c'), (2, 'd'), (7, 'e')],
         {IntervalWindow(1, 25): [set('abc'), set('abcde')]},
         1,
         3)
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         Repeatedly(AfterCount(2)),
         AccumulationMode.DISCARDING,
         [(1, 'a'), (15, 'b'), (6, 'c'), (2, 'd'), (7, 'e')],
         {IntervalWindow(1, 25): [set('abc'), set('de')]},
         1,
         3)
Ejemplo n.º 3
0
 def test_sessions_after_all(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterAll(AfterCount(2), AfterWatermark()),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(1, 13): [set('abc')]},
         1,
         2)
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterAll(AfterCount(5), AfterWatermark()),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(1, 13): [set('abcxy')]},
         1,
         2,
         late_data=[(1, 'x'), (2, 'y'), (3, 'z')])
Ejemplo n.º 4
0
 def test_sessions(self):
   p = Pipeline('DirectPipelineRunner')
   pcoll = self.timestamped_key_values(p, 'key', 1, 2, 3, 20, 35, 27)
   result = (pcoll
             | WindowInto('w', Sessions(10))
             | GroupByKey()
             | sort_values
             | reify_windows)
   expected = [('key @ [1.0, 13.0)', [1, 2, 3]),
               ('key @ [20.0, 45.0)', [20, 27, 35])]
   assert_that(result, equal_to(expected))
   p.run()
Ejemplo n.º 5
0
    def test_sessions_after_each(self):
        self.run_trigger_simple(
            Sessions(10),  # pyformat break
            AfterEach(AfterCount(2), AfterCount(3)),
            AccumulationMode.ACCUMULATING,
            zip(range(10), 'abcdefghij'),
            {
                IntervalWindow(0, 11): [set('ab')],
                IntervalWindow(0, 15): [set('abcdef')]
            },
            2)

        self.run_trigger_simple(
            Sessions(10),  # pyformat break
            Repeatedly(AfterEach(AfterCount(2), AfterCount(3))),
            AccumulationMode.ACCUMULATING,
            zip(range(10), 'abcdefghij'),
            {
                IntervalWindow(0, 11): [set('ab')],
                IntervalWindow(0, 15): [set('abcdef')],
                IntervalWindow(0, 17): [set('abcdefgh')]
            },
            2)
Ejemplo n.º 6
0
 def test_sessions_after_count(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterCount(2),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (15, 'b'), (6, 'c'), (30, 's'), (31, 't'), (50, 'z'),
          (50, 'y')],
         {
             IntervalWindow(1, 25): [set('abc')],
             IntervalWindow(30, 41): [set('st')],
             IntervalWindow(50, 60): [set('yz')]
         },
         1,
         2,
         3)
Ejemplo n.º 7
0
 def test_sessions_watermark_with_early_late(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterWatermark(early=AfterCount(2), late=AfterCount(1)),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (15, 'b'), (7, 'c'), (30, 'd')],
         {
             IntervalWindow(1, 25): [
                 set('abc'),  # early
                 set('abc'),  # on time
                 set('abcxy')  # late
             ],
             IntervalWindow(30, 40): [
                 set('d'),  # on time
             ],
             IntervalWindow(1, 40): [
                 set('abcdxyz')  # late
             ],
         },
         2,
         late_data=[(1, 'x'), (2, 'y'), (21, 'z')])
Ejemplo n.º 8
0
  def test_sessions_merging(self):
    windowfn = Sessions(10)

    def merge(*timestamps):
      windows = [windowfn.assign(context(None, t, [])) for t in timestamps]
      running = set()

      class TestMergeContext(WindowFn.MergeContext):

        def __init__(self):
          super(TestMergeContext, self).__init__(running)

        def merge(self, to_be_merged, merge_result):
          for w in to_be_merged:
            if w in running:
              running.remove(w)
          running.add(merge_result)

      for ws in windows:
        running.update(ws)
        windowfn.merge(TestMergeContext())
      windowfn.merge(TestMergeContext())
      return sorted(running)

    self.assertEqual([IntervalWindow(2, 12)], merge(2))
    self.assertEqual([IntervalWindow(2, 12), IntervalWindow(19, 29)],
                     merge(2, 19))

    self.assertEqual([IntervalWindow(2, 19)], merge(2, 9))
    self.assertEqual([IntervalWindow(2, 19)], merge(9, 2))

    self.assertEqual([IntervalWindow(2, 19), IntervalWindow(19, 29)],
                     merge(2, 9, 19))
    self.assertEqual([IntervalWindow(2, 19), IntervalWindow(19, 29)],
                     merge(19, 9, 2))

    self.assertEqual([IntervalWindow(2, 25)], merge(2, 15, 10))