def expand(self, pcoll): return (pcoll | WindowInto(window.GlobalWindows()) | "ToVoidKey" >> Map(lambda v: (None, v)) | "Group" >> GroupByKey() | "UnKey" >> Map(lambda (k, v): v) | "Match" >> Map(matcher))
def expand(self, pcoll): class ReifyTimestamps(DoFn): def process(self, element, timestamp=DoFn.TimestampParam): yield element[0], TimestampedValue(element[1], timestamp) class RestoreTimestamps(DoFn): def process(self, element, window=DoFn.WindowParam): # Pass the current window since _IdentityWindowFn wouldn't know how # to generate it. yield windowed_value.WindowedValue( (element[0], element[1].value), element[1].timestamp, [window]) windowing_saved = pcoll.windowing # The linter is confused. # pylint: disable=abstract-class-instantiated result = ( pcoll | ParDo(ReifyTimestamps()) | 'IdentityWindow' >> WindowInto( _IdentityWindowFn(windowing_saved.windowfn.get_window_coder()), trigger=AfterCount(1), accumulation_mode=AccumulationMode.DISCARDING, timestamp_combiner=TimestampCombiner.OUTPUT_AT_EARLIEST, ) | GroupByKey() | 'ExpandIterable' >> FlatMap(lambda e: [(e[0], value) for value in e[1]]) | ParDo(RestoreTimestamps())) result._windowing = windowing_saved return result
def expand(self, pcoll): # We must have at least a single element to ensure the matcher # code gets run even if the input pcollection is empty. keyed_singleton = pcoll.pipeline | Create([(None, None)]) keyed_actual = (pcoll | WindowInto(window.GlobalWindows()) | "ToVoidKey" >> Map(lambda v: (None, v))) _ = ((keyed_singleton, keyed_actual) | "Group" >> CoGroupByKey() | "Unkey" >> Map(lambda (k, (_, actual_values)): actual_values) | "Match" >> Map(matcher))
def expand(self, pcoll): if reify_windows: pcoll = pcoll | ParDo(ReifyTimestampWindow()) # We must have at least a single element to ensure the matcher # code gets run even if the input pcollection is empty. keyed_singleton = pcoll.pipeline | Create([(None, None)]) keyed_actual = ( pcoll | WindowInto(window.GlobalWindows()) | "ToVoidKey" >> Map(lambda v: (None, v))) _ = ((keyed_singleton, keyed_actual) | "Group" >> CoGroupByKey() | "Unkey" >> Map(lambda k___actual_values: k___actual_values[1][1]) | "Match" >> Map(matcher))
def expand(self, pcoll): if reify_windows: pcoll = pcoll | ParDo(ReifyTimestampWindow()) keyed_singleton = pcoll.pipeline | Create([(None, None)]) keyed_actual = (pcoll | WindowInto(custom_windowing or window.GlobalWindows()) | "ToVoidKey" >> Map(lambda v: (None, v))) plain_actual = ((keyed_singleton, keyed_actual) | "Group" >> CoGroupByKey() | "Unkey" >> Map(lambda k_values: k_values[1][1])) if custom_windowing: plain_actual = plain_actual | "AddWindow" >> ParDo(AddWindow()) plain_actual = plain_actual | "Match" >> Map(matcher)
def expand(self, pcoll): if reify_windows: pcoll = pcoll | ParDo(ReifyTimestampWindow()) keyed_singleton = pcoll.pipeline | Create([(None, None)]) if use_global_window: pcoll = pcoll | WindowInto(window.GlobalWindows()) keyed_actual = pcoll | "ToVoidKey" >> Map(lambda v: (None, v)) # This is a CoGroupByKey so that the matcher always runs, even if the # PCollection is empty. plain_actual = ((keyed_singleton, keyed_actual) | "Group" >> CoGroupByKey() | "Unkey" >> Map(lambda k_values: k_values[1][1])) if not use_global_window: plain_actual = plain_actual | "AddWindow" >> ParDo(AddWindow()) plain_actual = plain_actual | "Match" >> Map(matcher)