Esempio n. 1
0
def pickle_with_side_inputs(fn, tag_and_type=None):
    tags_and_types = []
    args = []
    if tag_and_type is not None:
        args.append(util.ArgumentPlaceholder())
        tags_and_types.append(tag_and_type)
    return pickler.dumps(
        (fn, args, {}, tags_and_types, core.Windowing(window.GlobalWindows())))
Esempio n. 2
0
  def apply(self, pcoll):
    def add_input_types(transform):
      type_hints = self.get_type_hints()
      if type_hints.input_types:
        return transform.with_input_types(type_hints.input_types[0][0])
      else:
        return transform

    combined = (pcoll
            | add_input_types(Map('KeyWithVoid', lambda v: (None, v))
               .with_output_types(KV[None, pcoll.element_type]))
            | CombinePerKey('CombinePerKey', self.fn, *self.args, **self.kwargs)
            | Map('UnKey', lambda (k, v): v))

    if not self.has_defaults and not self.as_view:
      return combined

    if self.has_defaults:
      combine_fn = (
          self.fn if isinstance(self.fn, CombineFn)
          else CombineFn.from_callable(self.fn))
      default_value = combine_fn.apply([], *self.args, **self.kwargs)
    else:
      default_value = pvalue._SINGLETON_NO_DEFAULT  # pylint: disable=protected-access
    view = pvalue.AsSingleton(combined, default_value=default_value)
    if self.as_view:
      return view
    else:
      if pcoll.windowing.windowfn != window.GlobalWindows():
        raise ValueError(
            "Default values are not yet supported in CombineGlobally() if the "
            "output  PCollection is not windowed by GlobalWindows. "
            "Instead, use CombineGlobally().without_defaults() to output "
            "an empty PCollection if the input PCollection is empty, "
            "or CombineGlobally().as_singleton_view() to get the default "
            "output of the CombineFn if the input PCollection is empty.")
      def typed(transform):
        # TODO(robertwb): We should infer this.
        if combined.element_type:
          return transform.with_output_types(combined.element_type)
        else:
          return transform
      return (pcoll.pipeline
              | Create('DoOnce', [None])
              | typed(Map('InjectDefault', lambda _, s: s, view)))
Esempio n. 3
0
 def __init__(self, windowfn, triggerfn=None, accumulation_mode=None,
              output_time_fn=None):
   global AccumulationMode, DefaultTrigger
   # pylint: disable=g-import-not-at-top
   from google.cloud.dataflow.transforms.trigger import AccumulationMode, DefaultTrigger
   # pylint: enable=g-import-not-at-top
   if triggerfn is None:
     triggerfn = DefaultTrigger()
   if accumulation_mode is None:
     if triggerfn == DefaultTrigger():
       accumulation_mode = AccumulationMode.DISCARDING
     else:
       raise ValueError(
           'accumulation_mode must be provided for non-trivial triggers')
   self.windowfn = windowfn
   self.triggerfn = triggerfn
   self.accumulation_mode = accumulation_mode
   self.output_time_fn = output_time_fn or OutputTimeFn.OUTPUT_AT_EOW
   self._is_default = (
       self.windowfn == window.GlobalWindows() and
       self.triggerfn == DefaultTrigger() and
       self.accumulation_mode == AccumulationMode.DISCARDING and
       self.output_time_fn == OutputTimeFn.OUTPUT_AT_EOW)
Esempio n. 4
0
 def get_windowing(self, unused_inputs):
     return core.Windowing(window.GlobalWindows())