Beispiel #1
0
 def expand(self, pcoll):
     return (pcoll
             | WindowInto(window.GlobalWindows())
             | "ToVoidKey" >> Map(lambda v: (None, v))
             | "Group" >> GroupByKey()
             | "UnKey" >> Map(lambda (k, v): v)
             | "Match" >> Map(matcher))
Beispiel #2
0
    def expand(self, pcoll):
        class ReifyTimestamps(DoFn):
            def process(self, element, timestamp=DoFn.TimestampParam):
                yield element[0], TimestampedValue(element[1], timestamp)

        class RestoreTimestamps(DoFn):
            def process(self, element, window=DoFn.WindowParam):
                # Pass the current window since _IdentityWindowFn wouldn't know how
                # to generate it.
                yield windowed_value.WindowedValue(
                    (element[0], element[1].value), element[1].timestamp,
                    [window])

        windowing_saved = pcoll.windowing
        # The linter is confused.
        # pylint: disable=abstract-class-instantiated
        result = (
            pcoll
            | ParDo(ReifyTimestamps())
            | 'IdentityWindow' >> WindowInto(
                _IdentityWindowFn(windowing_saved.windowfn.get_window_coder()),
                trigger=AfterCount(1),
                accumulation_mode=AccumulationMode.DISCARDING,
                timestamp_combiner=TimestampCombiner.OUTPUT_AT_EARLIEST,
            )
            | GroupByKey()
            | 'ExpandIterable' >> FlatMap(lambda e: [(e[0], value)
                                                     for value in e[1]])
            | ParDo(RestoreTimestamps()))
        result._windowing = windowing_saved
        return result
Beispiel #3
0
 def expand(self, pcoll):
     # We must have at least a single element to ensure the matcher
     # code gets run even if the input pcollection is empty.
     keyed_singleton = pcoll.pipeline | Create([(None, None)])
     keyed_actual = (pcoll
                     | WindowInto(window.GlobalWindows())
                     | "ToVoidKey" >> Map(lambda v: (None, v)))
     _ = ((keyed_singleton, keyed_actual)
          | "Group" >> CoGroupByKey()
          |
          "Unkey" >> Map(lambda (k, (_, actual_values)): actual_values)
          | "Match" >> Map(matcher))
Beispiel #4
0
    def expand(self, pcoll):
      if reify_windows:
        pcoll = pcoll | ParDo(ReifyTimestampWindow())

      # We must have at least a single element to ensure the matcher
      # code gets run even if the input pcollection is empty.
      keyed_singleton = pcoll.pipeline | Create([(None, None)])
      keyed_actual = (
          pcoll
          | WindowInto(window.GlobalWindows())
          | "ToVoidKey" >> Map(lambda v: (None, v)))
      _ = ((keyed_singleton, keyed_actual)
           | "Group" >> CoGroupByKey()
           | "Unkey" >> Map(lambda k___actual_values: k___actual_values[1][1])
           | "Match" >> Map(matcher))
Beispiel #5
0
        def expand(self, pcoll):
            if reify_windows:
                pcoll = pcoll | ParDo(ReifyTimestampWindow())

            keyed_singleton = pcoll.pipeline | Create([(None, None)])
            keyed_actual = (pcoll
                            | WindowInto(custom_windowing
                                         or window.GlobalWindows())
                            | "ToVoidKey" >> Map(lambda v: (None, v)))
            plain_actual = ((keyed_singleton, keyed_actual)
                            | "Group" >> CoGroupByKey()
                            | "Unkey" >> Map(lambda k_values: k_values[1][1]))

            if custom_windowing:
                plain_actual = plain_actual | "AddWindow" >> ParDo(AddWindow())

            plain_actual = plain_actual | "Match" >> Map(matcher)
Beispiel #6
0
        def expand(self, pcoll):
            if reify_windows:
                pcoll = pcoll | ParDo(ReifyTimestampWindow())

            keyed_singleton = pcoll.pipeline | Create([(None, None)])

            if use_global_window:
                pcoll = pcoll | WindowInto(window.GlobalWindows())

            keyed_actual = pcoll | "ToVoidKey" >> Map(lambda v: (None, v))

            # This is a CoGroupByKey so that the matcher always runs, even if the
            # PCollection is empty.
            plain_actual = ((keyed_singleton, keyed_actual)
                            | "Group" >> CoGroupByKey()
                            | "Unkey" >> Map(lambda k_values: k_values[1][1]))

            if not use_global_window:
                plain_actual = plain_actual | "AddWindow" >> ParDo(AddWindow())

            plain_actual = plain_actual | "Match" >> Map(matcher)