Example #1
0
 def test_timestamped_with_combiners(self):
   with TestPipeline() as p:
     result = (p
               # Create some initial test values.
               | 'start' >> Create([(k, k) for k in range(10)])
               # The purpose of the WindowInto transform is to establish a
               # FixedWindows windowing function for the PCollection.
               # It does not bucket elements into windows since the timestamps
               # from Create are not spaced 5 ms apart and very likely they all
               # fall into the same window.
               | 'w' >> WindowInto(FixedWindows(5))
               # Generate timestamped values using the values as timestamps.
               # Now there are values 5 ms apart and since Map propagates the
               # windowing function from input to output the output PCollection
               # will have elements falling into different 5ms windows.
               | Map(lambda x_t2: TimestampedValue(x_t2[0], x_t2[1]))
               # We add a 'key' to each value representing the index of the
               # window. This is important since there is no guarantee of
               # order for the elements of a PCollection.
               | Map(lambda v: (v // 5, v)))
     # Sum all elements associated with a key and window. Although it
     # is called CombinePerKey it is really CombinePerKeyAndWindow the
     # same way GroupByKey is really GroupByKeyAndWindow.
     sum_per_window = result | CombinePerKey(sum)
     # Compute mean per key and window.
     mean_per_window = result | combiners.Mean.PerKey()
     assert_that(sum_per_window, equal_to([(0, 10), (1, 35)]),
                 label='assert:sum')
     assert_that(mean_per_window, equal_to([(0, 2.0), (1, 7.0)]),
                 label='assert:mean')
Example #2
0
 def expand(self, pcoll):
     return (
         pcoll
         | 'Extract Primary Key' >>
         beam.FlatMap(lambda row: [(row[self.primary_key], row)])
         | 'Sample n=1 by Primary Key' >> CombinePerKey(SampleCombineFn(1))
         | 'Drop keys' >> beam.FlatMap(lambda kv: kv[1]))