def visit_transform(self, applied_ptransform): transform = applied_ptransform.transform # The FnApiRunner does not support streaming execution. if isinstance(transform, TestStream): self.supported_by_fnapi_runner = False # The FnApiRunner does not support reads from NativeSources. if (isinstance(transform, beam.io.Read) and isinstance(transform.source, NativeSource)): self.supported_by_fnapi_runner = False # The FnApiRunner does not support the use of _NativeWrites. if isinstance(transform, _NativeWrite): self.supported_by_fnapi_runner = False if isinstance(transform, beam.ParDo): dofn = transform.dofn # The FnApiRunner does not support execution of SplittableDoFns. if DoFnSignature(dofn).is_splittable_dofn(): self.supported_by_fnapi_runner = False # The FnApiRunner does not support execution of DoFns with timers. if DoFnSignature(dofn).has_timers(): self.supported_by_fnapi_runner = False # The FnApiRunner does not support execution of CombineFns with # deferred side inputs. if isinstance(dofn, CombineValuesDoFn): args, kwargs = transform.raw_side_inputs args_to_check = itertools.chain(args, kwargs.values()) if any(isinstance(arg, ArgumentPlaceholder) for arg in args_to_check): self.supported_by_fnapi_runner = False
def get_replacement_transform(self, ptransform): assert isinstance(ptransform, ParDo) do_fn = ptransform.fn signature = DoFnSignature(do_fn) if signature.is_splittable_dofn(): return SplittableParDo(ptransform) else: return ptransform
def get_replacement_transform(self, ptransform): assert isinstance(ptransform, ParDo) do_fn = ptransform.fn signature = DoFnSignature(do_fn) if signature.is_splittable_dofn(): return SplittableParDo(ptransform) else: return ptransform
def test_dofn_validate_process_error(self): class MyDoFn(DoFn): def process(self, element, w1=DoFn.WindowParam, w2=DoFn.WindowParam): pass with self.assertRaises(AssertionError): DoFnSignature(MyDoFn())
def expand(self, pcoll): sdf = self._ptransform.fn signature = DoFnSignature(sdf) restriction_coder = signature.get_restriction_coder() element_coder = typecoders.registry.get_coder(pcoll.element_type) keyed_elements = (pcoll | 'pair' >> ParDo(PairWithRestrictionFn(sdf)) | 'split' >> ParDo(SplitRestrictionFn(sdf)) | 'explode' >> ParDo(ExplodeWindowsFn()) | 'random' >> ParDo(RandomUniqueKeyFn())) return keyed_elements | ProcessKeyedElements( sdf, element_coder, restriction_coder, pcoll.windowing, self._ptransform.args, self._ptransform.kwargs, self._ptransform.side_inputs)
def test_dofn_get_defaults_kwonly(self): class MyDoFn(DoFn): def process(self, element, *, w=DoFn.WindowParam): pass signature = DoFnSignature(MyDoFn()) self.assertEqual(signature.process_method.defaults, [DoFn.WindowParam])
def test_dofn_validate_finish_bundle_error(self): class MyDoFn(DoFn): def process(self, element): pass def finish_bundle(self, w1=DoFn.WindowParam): pass with self.assertRaises(AssertionError): DoFnSignature(MyDoFn())
def __init__( self, sdf, args_for_invoker, kwargs_for_invoker): self.sdf = sdf self._element_tag = _ValueStateTag('element') self._restriction_tag = _ValueStateTag('restriction') self.watermark_hold_tag = _ValueStateTag('watermark_hold') self._process_element_invoker = None self.sdf_invoker = DoFnInvoker.create_invoker( DoFnSignature(self.sdf), context=DoFnContext('unused_context'), input_args=args_for_invoker, input_kwargs=kwargs_for_invoker) self._step_context = None
def expand(self, pcoll): sdf = self._ptransform.fn signature = DoFnSignature(sdf) invoker = DoFnInvoker.create_invoker(signature, process_invocation=False) element_coder = typecoders.registry.get_coder(pcoll.element_type) restriction_coder = invoker.invoke_restriction_coder() keyed_elements = (pcoll | 'pair' >> ParDo(PairWithRestrictionFn(sdf)) | 'split' >> ParDo(SplitRestrictionFn(sdf)) | 'explode' >> ParDo(ExplodeWindowsFn()) | 'random' >> ParDo(RandomUniqueKeyFn())) return keyed_elements | ProcessKeyedElements( sdf, element_coder, restriction_coder, pcoll.windowing, self._ptransform.args, self._ptransform.kwargs)
def __init__(self, sdf, args_for_invoker, kwargs_for_invoker): self.sdf = sdf self._element_tag = _ReadModifyWriteStateTag('element') self._restriction_tag = _ReadModifyWriteStateTag('restriction') self._watermark_state_tag = _ReadModifyWriteStateTag( 'watermark_estimator_state') self.watermark_hold_tag = _ReadModifyWriteStateTag('watermark_hold') self._process_element_invoker = None self._output_processor = _OutputProcessor() self.sdf_invoker = DoFnInvoker.create_invoker( DoFnSignature(self.sdf), context=DoFnContext('unused_context'), output_processor=self._output_processor, input_args=args_for_invoker, input_kwargs=kwargs_for_invoker) self._step_context = None
def test_unbounded_element_process_fn(self): class UnboundedDoFn(DoFn): @DoFn.unbounded_per_element() def process(self, element): pass class BoundedDoFn(DoFn): def process(self, element): pass signature = DoFnSignature(UnboundedDoFn()) self.assertTrue(signature.is_unbounded_per_element()) signature = DoFnSignature(BoundedDoFn()) self.assertFalse(signature.is_unbounded_per_element())
def matches(self, applied_ptransform): assert isinstance(applied_ptransform, AppliedPTransform) transform = applied_ptransform.transform if isinstance(transform, ParDo): signature = DoFnSignature(transform.fn) return signature.is_splittable_dofn()
def _matcher(applied_ptransform): assert isinstance(applied_ptransform, AppliedPTransform) transform = applied_ptransform.transform if isinstance(transform, ParDo): signature = DoFnSignature(transform.fn) return signature.is_splittable_dofn()
def __init__(self, do_fn): self._signature = DoFnSignature(do_fn)
def start_bundle(self): signature = DoFnSignature(self._do_fn) self._invoker = DoFnInvoker.create_invoker(signature, process_invocation=False)
def _validate_dofn(self, dofn): # Construction of DoFnSignature performs validation of the given DoFn. # In particular, it ends up calling userstate._validate_stateful_dofn. # That behavior is explicitly tested below in test_validate_dofn() DoFnSignature(dofn)
def start_bundle(self): signature = DoFnSignature(self._do_fn) self._invoker = DoFnInvoker.create_invoker( signature, output_processor=_NoneShallPassOutputProcessor(), process_invocation=False)