Exemple #1
0
def create(factory, transform_id, transform_proto, unused_parameter,
           consumers):
    return factory.augment_oldstyle_op(
        operations.FlattenOperation(
            transform_proto.unique_name,
            operation_specs.WorkerFlatten(
                None, [factory.get_only_output_coder(transform_proto)]),
            factory.counter_factory, factory.state_sampler),
        transform_proto.unique_name, consumers)
Exemple #2
0
  def create_execution_tree(self, descriptor):
    # TODO(vikasrk): Add an id field to Coder proto and use that instead.
    coders = {coder.function_spec.id: operation_specs.get_coder_from_spec(
        json.loads(unpack_function_spec_data(coder.function_spec)))
              for coder in descriptor.coders}

    counter_factory = counters.CounterFactory()
    # TODO(robertwb): Figure out the correct prefix to use for output counters
    # from StateSampler.
    state_sampler = statesampler.StateSampler(
        'fnapi-step%s-' % descriptor.id, counter_factory)
    consumers = collections.defaultdict(lambda: collections.defaultdict(list))
    ops_by_id = {}
    reversed_ops = []

    for transform in reversed(descriptor.primitive_transform):
      # TODO(robertwb): Figure out how to plumb through the operation name (e.g.
      # "s3") from the service through the FnAPI so that msec counters can be
      # reported and correctly plumbed through the service and the UI.
      operation_name = 'fnapis%s' % transform.id

      def only_element(iterable):
        element, = iterable
        return element

      if transform.function_spec.urn == DATA_OUTPUT_URN:
        target = beam_fn_api_pb2.Target(
            primitive_transform_reference=transform.id,
            name=only_element(transform.outputs.keys()))

        op = DataOutputOperation(
            operation_name,
            transform.step_name,
            consumers[transform.id],
            counter_factory,
            state_sampler,
            coders[only_element(transform.outputs.values()).coder_reference],
            target,
            self.data_channel_factory.create_data_channel(
                transform.function_spec))

      elif transform.function_spec.urn == DATA_INPUT_URN:
        target = beam_fn_api_pb2.Target(
            primitive_transform_reference=transform.id,
            name=only_element(transform.inputs.keys()))
        op = DataInputOperation(
            operation_name,
            transform.step_name,
            consumers[transform.id],
            counter_factory,
            state_sampler,
            coders[only_element(transform.outputs.values()).coder_reference],
            target,
            self.data_channel_factory.create_data_channel(
                transform.function_spec))

      elif transform.function_spec.urn == PYTHON_DOFN_URN:
        def create_side_input(tag, si):
          # TODO(robertwb): Extract windows (and keys) out of element data.
          return operation_specs.WorkerSideInputSource(
              tag=tag,
              source=SideInputSource(
                  self.state_handler,
                  beam_fn_api_pb2.StateKey(
                      function_spec_reference=si.view_fn.id),
                  coder=unpack_and_deserialize_py_fn(si.view_fn)))
        output_tags = list(transform.outputs.keys())
        spec = operation_specs.WorkerDoFn(
            serialized_fn=unpack_function_spec_data(transform.function_spec),
            output_tags=output_tags,
            input=None,
            side_inputs=[create_side_input(tag, si)
                         for tag, si in transform.side_inputs.items()],
            output_coders=[coders[transform.outputs[out].coder_reference]
                           for out in output_tags])

        op = operations.DoOperation(operation_name, spec, counter_factory,
                                    state_sampler)
        # TODO(robertwb): Move these to the constructor.
        op.step_name = transform.step_name
        for tag, op_consumers in consumers[transform.id].items():
          for consumer in op_consumers:
            op.add_receiver(
                consumer, output_tags.index(tag))

      elif transform.function_spec.urn == IDENTITY_DOFN_URN:
        op = operations.FlattenOperation(operation_name, None, counter_factory,
                                         state_sampler)
        # TODO(robertwb): Move these to the constructor.
        op.step_name = transform.step_name
        for tag, op_consumers in consumers[transform.id].items():
          for consumer in op_consumers:
            op.add_receiver(consumer, 0)

      elif transform.function_spec.urn == PYTHON_SOURCE_URN:
        source = load_compressed(unpack_function_spec_data(
            transform.function_spec))
        # TODO(vikasrk): Remove this once custom source is implemented with
        # splittable dofn via the data plane.
        spec = operation_specs.WorkerRead(
            iobase.SourceBundle(1.0, source, None, None),
            [WindowedValueCoder(source.default_output_coder())])
        op = operations.ReadOperation(operation_name, spec, counter_factory,
                                      state_sampler)
        op.step_name = transform.step_name
        output_tags = list(transform.outputs.keys())
        for tag, op_consumers in consumers[transform.id].items():
          for consumer in op_consumers:
            op.add_receiver(
                consumer, output_tags.index(tag))

      else:
        raise NotImplementedError

      # Record consumers.
      for _, inputs in transform.inputs.items():
        for target in inputs.target:
          consumers[target.primitive_transform_reference][target.name].append(
              op)

      reversed_ops.append(op)
      ops_by_id[transform.id] = op

    return list(reversed(reversed_ops)), ops_by_id