Example #1
0
    def call(self):
        self._call_count += 1
        assert self._call_count <= (1 +
                                    len(self._applied_transform.side_inputs))
        metrics_container = MetricsContainer(
            self._applied_transform.full_label)
        scoped_metrics_container = ScopedMetricsContainer(metrics_container)

        for side_input in self._applied_transform.side_inputs:
            if side_input not in self._side_input_values:
                has_result, value = (self._evaluation_context.
                                     get_value_or_schedule_after_output(
                                         side_input, self))
                if not has_result:
                    # Monitor task will reschedule this executor once the side input is
                    # available.
                    return
                self._side_input_values[side_input] = value

        side_input_values = [
            self._side_input_values[side_input]
            for side_input in self._applied_transform.side_inputs
        ]

        try:
            evaluator = self._transform_evaluator_registry.for_application(
                self._applied_transform, self._input_bundle, side_input_values,
                scoped_metrics_container)

            if self._input_bundle:
                for value in self._input_bundle.get_elements_iterable():
                    evaluator.process_element(value)

            with scoped_metrics_container:
                result = evaluator.finish_bundle()
                result.logical_metric_updates = metrics_container.get_cumulative(
                )

            if self._evaluation_context.has_cache:
                for uncommitted_bundle in result.uncommitted_output_bundles:
                    self._evaluation_context.append_to_cache(
                        self._applied_transform, uncommitted_bundle.tag,
                        uncommitted_bundle.get_elements_iterable())
                undeclared_tag_values = result.undeclared_tag_values
                if undeclared_tag_values:
                    for tag, value in undeclared_tag_values.iteritems():
                        self._evaluation_context.append_to_cache(
                            self._applied_transform, tag, value)

            self._completion_callback.handle_result(self._input_bundle, result)
            return result
        except Exception as e:  # pylint: disable=broad-except
            logging.warning('Task failed: %s',
                            traceback.format_exc(),
                            exc_info=True)
            self._completion_callback.handle_exception(e)
        finally:
            self._evaluation_context.metrics().commit_physical(
                self._input_bundle, metrics_container.get_cumulative())
            self._transform_evaluation_state.complete(self)
Example #2
0
  def __init__(self, operation_name, spec, counter_factory, state_sampler):
    """Initializes a worker operation instance.

    Args:
      operation_name: The system name assigned by the runner for this
        operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
    self.operation_name = operation_name
    self.spec = spec
    self.counter_factory = counter_factory
    self.consumers = collections.defaultdict(list)

    # These are overwritten in the legacy harness.
    self.step_name = operation_name
    self.metrics_container = MetricsContainer(self.step_name)
    self.scoped_metrics_container = ScopedMetricsContainer(
        self.metrics_container)

    self.state_sampler = state_sampler
    self.scoped_start_state = self.state_sampler.scoped_state(
        self.operation_name, 'start')
    self.scoped_process_state = self.state_sampler.scoped_state(
        self.operation_name, 'process')
    self.scoped_finish_state = self.state_sampler.scoped_state(
        self.operation_name, 'finish')
    # TODO(ccy): the '-abort' state can be added when the abort is supported in
    # Operations.
    self.receivers = []
Example #3
0
 def __init__(self, operation_name, spec, counter_factory, state_sampler):
     super(CombineOperation, self).__init__(operation_name, spec,
                                            counter_factory, state_sampler)
     # Combiners do not accept deferred side-inputs (the ignored fourth argument)
     # and therefore the code to handle the extra args/kwargs is simpler than for
     # the DoFn's of ParDo.
     fn, args, kwargs = pickler.loads(self.spec.serialized_fn)[:3]
     self.phased_combine_fn = (PhasedCombineFnExecutor(
         self.spec.phase, fn, args, kwargs))
     self.scoped_metrics_container = ScopedMetricsContainer()
Example #4
0
    def test_scoped_container(self):
        c1 = MetricsContainer('mystep')
        c2 = MetricsContainer('myinternalstep')
        with ScopedMetricsContainer(c1):
            self.assertEqual(c1, MetricsEnvironment.current_container())
            counter = Metrics.counter('ns', 'name')
            counter.inc(2)

            with ScopedMetricsContainer(c2):
                self.assertEqual(c2, MetricsEnvironment.current_container())
                counter = Metrics.counter('ns', 'name')
                counter.inc(3)
                self.assertEqual(list(c2.get_cumulative().counters.items()),
                                 [(MetricKey('myinternalstep',
                                             MetricName('ns', 'name')), 3)])

            self.assertEqual(c1, MetricsEnvironment.current_container())
            counter = Metrics.counter('ns', 'name')
            counter.inc(4)
            self.assertEqual(
                list(c1.get_cumulative().counters.items()),
                [(MetricKey('mystep', MetricName('ns', 'name')), 6)])
Example #5
0
    def __init__(self, name_context, spec, counter_factory, state_sampler):
        """Initializes a worker operation instance.

    Args:
      name_context: A NameContext instance or string(deprecated), with the
        name information for this operation.
      spec: A operation_specs.Worker* instance.
      counter_factory: The CounterFactory to use for our counters.
      state_sampler: The StateSampler for the current operation.
    """
        if isinstance(name_context, common.NameContext):
            # TODO(BEAM-4028): Clean this up once it's completely migrated.
            # We use the specific operation name that is used for metrics and state
            # sampling.
            self.name_context = name_context
        else:
            self.name_context = common.NameContext(name_context)

        # TODO(BEAM-4028): Remove following two lines. Rely on name context.
        self.operation_name = self.name_context.step_name
        self.step_name = self.name_context.logging_name()

        self.spec = spec
        self.counter_factory = counter_factory
        self.consumers = collections.defaultdict(list)

        # These are overwritten in the legacy harness.
        self.metrics_container = MetricsContainer(
            self.name_context.metrics_name())
        # TODO(BEAM-4094): Remove ScopedMetricsContainer after Dataflow no longer
        # depends on it.
        self.scoped_metrics_container = ScopedMetricsContainer()

        self.state_sampler = state_sampler
        self.scoped_start_state = self.state_sampler.scoped_state(
            self.name_context.metrics_name(),
            'start',
            metrics_container=self.metrics_container)
        self.scoped_process_state = self.state_sampler.scoped_state(
            self.name_context.metrics_name(),
            'process',
            metrics_container=self.metrics_container)
        self.scoped_finish_state = self.state_sampler.scoped_state(
            self.name_context.metrics_name(),
            'finish',
            metrics_container=self.metrics_container)
        # TODO(ccy): the '-abort' state can be added when the abort is supported in
        # Operations.
        self.receivers = []
Example #6
0
  def __init__(self,
               fn,
               args,
               kwargs,
               side_inputs,
               windowing,
               tagged_receivers=None,
               step_name=None,
               logging_context=None,
               state=None,
               scoped_metrics_container=None):
    """Initializes a DoFnRunner.

    Args:
      fn: user DoFn to invoke
      args: positional side input arguments (static and placeholder), if any
      kwargs: keyword side input arguments (static and placeholder), if any
      side_inputs: list of sideinput.SideInputMaps for deferred side inputs
      windowing: windowing properties of the output PCollection(s)
      tagged_receivers: a dict of tag name to Receiver objects
      step_name: the name of this step
      logging_context: a LoggingContext object
      state: handle for accessing DoFn state
      scoped_metrics_container: Context switcher for metrics container
    """
    # Need to support multiple iterations.
    side_inputs = list(side_inputs)

    from apache_beam.metrics.execution import ScopedMetricsContainer

    self.scoped_metrics_container = (
        scoped_metrics_container or ScopedMetricsContainer())
    self.step_name = step_name
    self.logging_context = logging_context or LoggingContext()
    self.context = DoFnContext(step_name, state=state)

    do_fn_signature = DoFnSignature(fn)

    # Optimize for the common case.
    main_receivers = tagged_receivers[None]
    output_processor = _OutputProcessor(
        windowing.windowfn, main_receivers, tagged_receivers)

    self.do_fn_invoker = DoFnInvoker.create_invoker(
        do_fn_signature, output_processor, self.context, side_inputs, args,
        kwargs)
Example #7
0
    def call(self):
        self._call_count += 1
        assert self._call_count <= (1 +
                                    len(self._applied_ptransform.side_inputs))
        metrics_container = MetricsContainer(
            self._applied_ptransform.full_label)
        scoped_metrics_container = ScopedMetricsContainer(metrics_container)

        for side_input in self._applied_ptransform.side_inputs:
            if side_input not in self._side_input_values:
                has_result, value = (self._evaluation_context.
                                     get_value_or_schedule_after_output(
                                         side_input, self))
                if not has_result:
                    # Monitor task will reschedule this executor once the side input is
                    # available.
                    return
                self._side_input_values[side_input] = value
        side_input_values = [
            self._side_input_values[side_input]
            for side_input in self._applied_ptransform.side_inputs
        ]

        while self._retry_count < self._max_retries_per_bundle:
            try:
                self.attempt_call(metrics_container, scoped_metrics_container,
                                  side_input_values)
                break
            except Exception as e:
                self._retry_count += 1
                logging.error(
                    'Exception at bundle %r, due to an exception.\n %s',
                    self._input_bundle, traceback.format_exc())
                if self._retry_count == self._max_retries_per_bundle:
                    logging.error('Giving up after %s attempts.',
                                  self._max_retries_per_bundle)
                    if self._retry_count == 1:
                        logging.info(
                            'Use the experimental flag --direct_runner_bundle_retry'
                            ' to retry failed bundles (up to %d times).',
                            TransformExecutor._MAX_RETRY_PER_BUNDLE)
                    self._completion_callback.handle_exception(self, e)

        self._evaluation_context.metrics().commit_physical(
            self._input_bundle, metrics_container.get_cumulative())
        self._transform_evaluation_state.complete(self)
Example #8
0
  def call(self):
    self._call_count += 1
    assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
    metrics_container = MetricsContainer(self._applied_ptransform.full_label)
    scoped_metrics_container = ScopedMetricsContainer(metrics_container)

    for side_input in self._applied_ptransform.side_inputs:
      # Find the projection of main's window onto the side input's window.
      window_mapping_fn = side_input._view_options().get(
          'window_mapping_fn', sideinputs._global_window_mapping_fn)
      main_onto_side_window = window_mapping_fn(self._latest_main_input_window)
      block_until = main_onto_side_window.end

      if side_input not in self._side_input_values:
        value = self._evaluation_context.get_value_or_block_until_ready(
            side_input, self, block_until)
        if not value:
          # Monitor task will reschedule this executor once the side input is
          # available.
          return
        self._side_input_values[side_input] = value
    side_input_values = [self._side_input_values[side_input]
                         for side_input in self._applied_ptransform.side_inputs]

    while self._retry_count < self._max_retries_per_bundle:
      try:
        self.attempt_call(metrics_container,
                          scoped_metrics_container,
                          side_input_values)
        break
      except Exception as e:
        self._retry_count += 1
        logging.error(
            'Exception at bundle %r, due to an exception.\n %s',
            self._input_bundle, traceback.format_exc())
        if self._retry_count == self._max_retries_per_bundle:
          logging.error('Giving up after %s attempts.',
                        self._max_retries_per_bundle)
          self._completion_callback.handle_exception(self, e)

    self._evaluation_context.metrics().commit_physical(
        self._input_bundle,
        metrics_container.get_cumulative())
    self._transform_evaluation_state.complete(self)
Example #9
0
def create_operation(operation_name,
                     spec,
                     counter_factory,
                     step_name,
                     state_sampler,
                     test_shuffle_source=None,
                     test_shuffle_sink=None,
                     is_streaming=False):
    """Create Operation object for given operation specification."""
    if isinstance(spec, operation_specs.WorkerRead):
        if isinstance(spec.source, iobase.SourceBundle):
            op = ReadOperation(operation_name, spec, counter_factory,
                               state_sampler)
        else:
            from dataflow_worker.native_operations import NativeReadOperation
            op = NativeReadOperation(operation_name, spec, counter_factory,
                                     state_sampler)
    elif isinstance(spec, operation_specs.WorkerWrite):
        from dataflow_worker.native_operations import NativeWriteOperation
        op = NativeWriteOperation(operation_name, spec, counter_factory,
                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerCombineFn):
        op = CombineOperation(operation_name, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerPartialGroupByKey):
        op = create_pgbk_op(operation_name, spec, counter_factory,
                            state_sampler)
    elif isinstance(spec, operation_specs.WorkerDoFn):
        op = DoOperation(operation_name, spec, counter_factory, state_sampler)
    elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead):
        from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation
        op = GroupedShuffleReadOperation(operation_name,
                                         spec,
                                         counter_factory,
                                         state_sampler,
                                         shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead):
        from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation
        op = UngroupedShuffleReadOperation(operation_name,
                                           spec,
                                           counter_factory,
                                           state_sampler,
                                           shuffle_source=test_shuffle_source)
    elif isinstance(spec, operation_specs.WorkerInMemoryWrite):
        op = InMemoryWriteOperation(operation_name, spec, counter_factory,
                                    state_sampler)
    elif isinstance(spec, operation_specs.WorkerShuffleWrite):
        from dataflow_worker.shuffle_operations import ShuffleWriteOperation
        op = ShuffleWriteOperation(operation_name,
                                   spec,
                                   counter_factory,
                                   state_sampler,
                                   shuffle_sink=test_shuffle_sink)
    elif isinstance(spec, operation_specs.WorkerFlatten):
        op = FlattenOperation(operation_name, spec, counter_factory,
                              state_sampler)
    elif isinstance(spec, operation_specs.WorkerMergeWindows):
        from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation
        from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation
        if is_streaming:
            op = StreamingGroupAlsoByWindowsOperation(operation_name, spec,
                                                      counter_factory,
                                                      state_sampler)
        else:
            op = BatchGroupAlsoByWindowsOperation(operation_name, spec,
                                                  counter_factory,
                                                  state_sampler)
    elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows):
        from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation
        op = ReifyTimestampAndWindowsOperation(operation_name, spec,
                                               counter_factory, state_sampler)
    else:
        raise TypeError(
            'Expected an instance of operation_specs.Worker* class '
            'instead of %s' % (spec, ))
    op.step_name = step_name
    op.metrics_container = MetricsContainer(step_name)
    op.scoped_metrics_container = ScopedMetricsContainer(op.metrics_container)
    return op
Example #10
0
    def __init__(
            self,
            fn,
            args,
            kwargs,
            side_inputs,
            windowing,
            context=None,
            tagged_receivers=None,
            logger=None,
            step_name=None,
            # Preferred alternative to logger
            # TODO(robertwb): Remove once all runners are updated.
            logging_context=None,
            # Preferred alternative to context
            # TODO(robertwb): Remove once all runners are updated.
            state=None,
            scoped_metrics_container=None):
        """Initializes a DoFnRunner.

    Args:
      fn: user DoFn to invoke
      args: positional side input arguments (static and placeholder), if any
      kwargs: keyword side input arguments (static and placeholder), if any
      side_inputs: list of sideinput.SideInputMaps for deferred side inputs
      windowing: windowing properties of the output PCollection(s)
      context: a DoFnContext to use (deprecated)
      tagged_receivers: a dict of tag name to Receiver objects
      logger: a logging module (deprecated)
      step_name: the name of this step
      logging_context: a LoggingContext object
      state: handle for accessing DoFn state
      scoped_metrics_container: Context switcher for metrics container
    """
        self.step_name = step_name
        self.window_fn = windowing.windowfn
        self.tagged_receivers = tagged_receivers
        self.scoped_metrics_container = (scoped_metrics_container
                                         or ScopedMetricsContainer())

        global_window = window.GlobalWindow()

        # Need to support multiple iterations.
        side_inputs = list(side_inputs)

        if logging_context:
            self.logging_context = logging_context
        else:
            self.logging_context = get_logging_context(logger,
                                                       step_name=step_name)

        # Optimize for the common case.
        self.main_receivers = as_receiver(tagged_receivers[None])

        # TODO(sourabh): Deprecate the use of context
        if state:
            assert context is None
            self.context = DoFnContext(self.step_name, state=state)
        else:
            assert context is not None
            self.context = context

        # TODO(Sourabhbajaj): Remove the usage of OldDoFn
        if isinstance(fn, core.NewDoFn):
            self.is_new_dofn = True

            # Stash values for use in new_dofn_process.
            self.side_inputs = side_inputs
            self.has_windowed_side_inputs = not all(si.is_globally_windowed()
                                                    for si in self.side_inputs)

            self.args = args if args else []
            self.kwargs = kwargs if kwargs else {}
            self.dofn = fn

        else:
            self.is_new_dofn = False
            self.has_windowed_side_inputs = False  # Set to True in one case below.
            if not args and not kwargs:
                self.dofn = fn
                self.dofn_process = fn.process
            else:
                if side_inputs and all(side_input.is_globally_windowed()
                                       for side_input in side_inputs):
                    args, kwargs = util.insert_values_in_args(
                        args, kwargs, [
                            side_input[global_window]
                            for side_input in side_inputs
                        ])
                    side_inputs = []
                if side_inputs:
                    self.has_windowed_side_inputs = True

                    def process(context):
                        w = context.windows[0]
                        cur_args, cur_kwargs = util.insert_values_in_args(
                            args, kwargs,
                            [side_input[w] for side_input in side_inputs])
                        return fn.process(context, *cur_args, **cur_kwargs)

                    self.dofn_process = process
                elif kwargs:
                    self.dofn_process = lambda context: fn.process(
                        context, *args, **kwargs)
                else:
                    self.dofn_process = lambda context: fn.process(
                        context, *args)

                class CurriedFn(core.DoFn):

                    start_bundle = staticmethod(fn.start_bundle)
                    process = staticmethod(self.dofn_process)
                    finish_bundle = staticmethod(fn.finish_bundle)

                self.dofn = CurriedFn()
Example #11
0
File: common.py Project: tedyu/beam
    def __init__(
            self,
            fn,
            args,
            kwargs,
            side_inputs,
            windowing,
            context=None,
            tagged_receivers=None,
            logger=None,
            step_name=None,
            # Preferred alternative to logger
            # TODO(robertwb): Remove once all runners are updated.
            logging_context=None,
            # Preferred alternative to context
            # TODO(robertwb): Remove once all runners are updated.
            state=None,
            scoped_metrics_container=None):
        """Initializes a DoFnRunner.

    Args:
      fn: user DoFn to invoke
      args: positional side input arguments (static and placeholder), if any
      kwargs: keyword side input arguments (static and placeholder), if any
      side_inputs: list of sideinput.SideInputMaps for deferred side inputs
      windowing: windowing properties of the output PCollection(s)
      context: a DoFnContext to use (deprecated)
      tagged_receivers: a dict of tag name to Receiver objects
      logger: a logging module (deprecated)
      step_name: the name of this step
      logging_context: a LoggingContext object
      state: handle for accessing DoFn state
      scoped_metrics_container: Context switcher for metrics container
    """
        self.scoped_metrics_container = (scoped_metrics_container
                                         or ScopedMetricsContainer())
        self.step_name = step_name

        # Need to support multiple iterations.
        side_inputs = list(side_inputs)

        if logging_context:
            self.logging_context = logging_context
        else:
            self.logging_context = get_logging_context(logger,
                                                       step_name=step_name)

        # TODO(sourabh): Deprecate the use of context
        if state:
            assert context is None
            context = DoFnContext(step_name, state=state)
        else:
            assert context is not None
            context = context

        self.context = context

        do_fn_signature = DoFnSignature(fn)

        # Optimize for the common case.
        main_receivers = as_receiver(tagged_receivers[None])
        output_processor = OutputProcessor(windowing.windowfn, main_receivers,
                                           tagged_receivers)

        self.do_fn_invoker = DoFnInvoker.create_invoker(
            output_processor, do_fn_signature, context, side_inputs, args,
            kwargs)
Example #12
0
    def __init__(
            self,
            fn,
            args,
            kwargs,
            side_inputs,
            windowing,
            context=None,
            tagged_receivers=None,
            logger=None,
            step_name=None,
            # Preferred alternative to logger
            # TODO(robertwb): Remove once all runners are updated.
            logging_context=None,
            # Preferred alternative to context
            # TODO(robertwb): Remove once all runners are updated.
            state=None,
            scoped_metrics_container=None):
        """Initializes a DoFnRunner.

    Args:
      fn: user DoFn to invoke
      args: positional side input arguments (static and placeholder), if any
      kwargs: keyword side input arguments (static and placeholder), if any
      side_inputs: list of sideinput.SideInputMaps for deferred side inputs
      windowing: windowing properties of the output PCollection(s)
      context: a DoFnContext to use (deprecated)
      tagged_receivers: a dict of tag name to Receiver objects
      logger: a logging module (deprecated)
      step_name: the name of this step
      logging_context: a LoggingContext object
      state: handle for accessing DoFn state
      scoped_metrics_container: Context switcher for metrics container
    """
        self.step_name = step_name
        self.window_fn = windowing.windowfn
        self.tagged_receivers = tagged_receivers
        self.scoped_metrics_container = (scoped_metrics_container
                                         or ScopedMetricsContainer())

        global_window = GlobalWindow()

        # Need to support multiple iterations.
        side_inputs = list(side_inputs)

        if logging_context:
            self.logging_context = logging_context
        else:
            self.logging_context = get_logging_context(logger,
                                                       step_name=step_name)

        # Optimize for the common case.
        self.main_receivers = as_receiver(tagged_receivers[None])

        # TODO(sourabh): Deprecate the use of context
        if state:
            assert context is None
            self.context = DoFnContext(self.step_name, state=state)
        else:
            assert context is not None
            self.context = context

        class ArgPlaceholder(object):
            def __init__(self, placeholder):
                self.placeholder = placeholder

        # Stash values for use in dofn_process.
        self.side_inputs = side_inputs
        self.has_windowed_inputs = not all(si.is_globally_windowed()
                                           for si in self.side_inputs)

        self.args = args if args else []
        self.kwargs = kwargs if kwargs else {}
        self.dofn = fn
        self.dofn_process = fn.process

        arguments, _, _, defaults = self.dofn.get_function_arguments('process')
        defaults = defaults if defaults else []
        self_in_args = int(self.dofn.is_process_bounded())

        self.use_simple_invoker = (not side_inputs and not args and not kwargs
                                   and not defaults)
        if self.use_simple_invoker:
            # As we're using the simple invoker we don't need to compute placeholders
            return

        self.has_windowed_inputs = (self.has_windowed_inputs
                                    or core.DoFn.WindowParam in defaults)

        # Try to prepare all the arguments that can just be filled in
        # without any additional work. in the process function.
        # Also cache all the placeholders needed in the process function.

        # Fill in sideInputs if they are globally windowed
        if not self.has_windowed_inputs:
            self.args, self.kwargs = util.insert_values_in_args(
                args, kwargs, [si[global_window] for si in side_inputs])

        # Create placeholder for element parameter
        if core.DoFn.ElementParam not in defaults:
            args_to_pick = len(arguments) - len(defaults) - 1 - self_in_args
            final_args = [ArgPlaceholder(core.DoFn.ElementParam)] + \
                         self.args[:args_to_pick]
        else:
            args_to_pick = len(arguments) - len(defaults) - self_in_args
            final_args = self.args[:args_to_pick]

        # Fill the OtherPlaceholders for context, window or timestamp
        args = iter(self.args[args_to_pick:])
        for a, d in zip(arguments[-len(defaults):], defaults):
            if d == core.DoFn.ElementParam:
                final_args.append(ArgPlaceholder(d))
            elif d == core.DoFn.ContextParam:
                final_args.append(ArgPlaceholder(d))
            elif d == core.DoFn.WindowParam:
                final_args.append(ArgPlaceholder(d))
            elif d == core.DoFn.TimestampParam:
                final_args.append(ArgPlaceholder(d))
            elif d == core.DoFn.SideInputParam:
                # If no more args are present then the value must be passed via kwarg
                try:
                    final_args.append(args.next())
                except StopIteration:
                    if a not in self.kwargs:
                        raise ValueError(
                            "Value for sideinput %s not provided" % a)
            else:
                # If no more args are present then the value must be passed via kwarg
                try:
                    final_args.append(args.next())
                except StopIteration:
                    pass
        final_args.extend(list(args))
        self.args = final_args

        # Stash the list of placeholder positions for performance
        self.placeholders = [(i, x.placeholder)
                             for (i, x) in enumerate(self.args)
                             if isinstance(x, ArgPlaceholder)]