def expand(self, pcoll): # This code path is only used in the local direct runner. For Dataflow # runner execution, the GroupByKey transform is expanded on the service. input_type = pcoll.element_type if input_type is not None: # Initialize type-hints used below to enforce type-checking and to pass # downstream to further PTransforms. key_type, value_type = trivial_inference.key_value_types(input_type) typecoders.registry.verify_deterministic( typecoders.registry.get_coder(key_type), 'GroupByKey operation "%s"' % self.label) reify_output_type = KV[key_type, typehints.WindowedValue[value_type]] gbk_input_type = ( KV[key_type, Iterable[typehints.WindowedValue[value_type]]]) gbk_output_type = KV[key_type, Iterable[value_type]] # pylint: disable=bad-continuation return (pcoll | 'reify_windows' >> (ParDo(self.ReifyWindows()) .with_output_types(reify_output_type)) | 'group_by_key' >> (GroupByKeyOnly() .with_input_types(reify_output_type) .with_output_types(gbk_input_type)) | ('group_by_window' >> ParDo( self.GroupAlsoByWindow(pcoll.windowing)) .with_input_types(gbk_input_type) .with_output_types(gbk_output_type))) else: return (pcoll | 'reify_windows' >> ParDo(self.ReifyWindows()) | 'group_by_key' >> GroupByKeyOnly() | 'group_by_window' >> ParDo( self.GroupAlsoByWindow(pcoll.windowing)))
def build_map_unpack(state, arg): """Joins arg count maps from the stack into a single dict.""" key_types = [] value_types = [] for _ in range(arg): type_constraint = state.stack.pop() if isinstance(type_constraint, typehints.Dict.DictConstraint): key_types.append(type_constraint.key_type) value_types.append(type_constraint.value_type) else: key_type, value_type = key_value_types( element_type(type_constraint)) key_types.append(key_type) value_types.append(value_type) state.stack.append(Dict[Union[key_types], Union[value_types]])
def dict_update(state, arg): other = state.stack.pop() base = state.stack[-arg] if isinstance(base, typehints.Dict.DictConstraint): base_key_type = base.key_type base_value_type = base.value_type else: base_key_type = Any base_value_type = Any if isinstance(other, typehints.Dict.DictConstraint): other_key_type = other.key_type other_value_type = other.value_type else: other_key_type, other_value_type = key_value_types(element_type(other)) state.stack[-arg] = Dict[union(base_key_type, other_key_type), union(base_value_type, other_value_type)]
def expand(self, pcoll): # Imported here to avoid circular dependencies. # pylint: disable=wrong-import-order, wrong-import-position from apache_beam.coders import typecoders input_type = pcoll.element_type if input_type is not None: # Initialize type-hints used below to enforce type-checking and to # pass downstream to further PTransforms. key_type, value_type = trivial_inference.key_value_types(input_type) # Enforce the input to a GBK has a KV element type. pcoll.element_type = typehints.typehints.coerce_to_kv_type( pcoll.element_type) typecoders.registry.verify_deterministic( typecoders.registry.get_coder(key_type), 'GroupByKey operation "%s"' % self.label) reify_output_type = typehints.KV[ key_type, typehints.WindowedValue[value_type]] # type: ignore[misc] gbk_input_type = ( typehints.KV[ key_type, typehints.Iterable[typehints.WindowedValue[ # type: ignore[misc] value_type]]]) gbk_output_type = typehints.KV[key_type, typehints.Iterable[value_type]] # pylint: disable=bad-continuation return ( pcoll | 'ReifyWindows' >> ( ParDo(beam.GroupByKey.ReifyWindows()).with_output_types( reify_output_type)) | 'GroupByKey' >> ( _GroupByKeyOnly().with_input_types( reify_output_type).with_output_types(gbk_input_type)) | ( 'GroupByWindow' >> _GroupAlsoByWindow(pcoll.windowing).with_input_types( gbk_input_type).with_output_types(gbk_output_type))) else: # The input_type is None, run the default return ( pcoll | 'ReifyWindows' >> ParDo(beam.GroupByKey.ReifyWindows()) | 'GroupByKey' >> _GroupByKeyOnly() | 'GroupByWindow' >> _GroupAlsoByWindow(pcoll.windowing))
def infer_output_type(self, input_type): key_type, windowed_value_iter_type = trivial_inference.key_value_types( input_type) value_type = windowed_value_iter_type.inner_type.inner_type return typehints.Iterable[typehints.KV[key_type, typehints.Iterable[value_type]]]
def infer_output_type(self, input_type): key_type, value_type = trivial_inference.key_value_types(input_type) return typehints.KV[key_type, typehints.Iterable[value_type]]
def infer_output_type(self, input_type): key_type, value_type = trivial_inference.key_value_types(input_type) return KV[key_type, Iterable[value_type]]
def infer_output_type(self, input_type): key_type, windowed_value_iter_type = trivial_inference.key_value_types( input_type) value_type = windowed_value_iter_type.inner_type.inner_type return Iterable[KV[key_type, Iterable[value_type]]]
def infer_output_type(self, input_type): key_type, value_type = trivial_inference.key_value_types(input_type) return Iterable[KV[key_type, typehints.WindowedValue[value_type]]]