예제 #1
0
 def _infer_result_type(self, transform, inputs, result_pcollection):
     # TODO(robertwb): Multi-input, multi-output inference.
     type_options = self._options.view_as(TypeOptions)
     if (type_options is not None and type_options.pipeline_type_check
             and isinstance(result_pcollection, pvalue.PCollection)
             and (not result_pcollection.element_type
                  # TODO(robertwb): Ideally we'd do intersection here.
                  or result_pcollection.element_type == typehints.Any)):
         input_element_type = (inputs[0].element_type
                               if len(inputs) == 1 else typehints.Any)
         type_hints = transform.get_type_hints()
         declared_output_type = type_hints.simple_output_type(
             transform.label)
         if declared_output_type:
             input_types = type_hints.input_types
             if input_types and input_types[0]:
                 declared_input_type = input_types[0][0]
                 result_pcollection.element_type = typehints.bind_type_variables(
                     declared_output_type,
                     typehints.match_type_variables(declared_input_type,
                                                    input_element_type))
             else:
                 result_pcollection.element_type = declared_output_type
         else:
             result_pcollection.element_type = transform.infer_output_type(
                 input_element_type)
예제 #2
0
 def _infer_result_type(self, transform, inputs, result_pcollection):
   # TODO(robertwb): Multi-input, multi-output inference.
   # TODO(robertwb): Ideally we'd do intersection here.
   type_options = self._options.view_as(TypeOptions)
   if (type_options is not None and type_options.pipeline_type_check
       and isinstance(result_pcollection, pvalue.PCollection)
       and not result_pcollection.element_type):
     input_element_type = (
         inputs[0].element_type
         if len(inputs) == 1
         else typehints.Any)
     type_hints = transform.get_type_hints()
     declared_output_type = type_hints.simple_output_type(transform.label)
     if declared_output_type:
       input_types = type_hints.input_types
       if input_types and input_types[0]:
         declared_input_type = input_types[0][0]
         result_pcollection.element_type = typehints.bind_type_variables(
             declared_output_type,
             typehints.match_type_variables(declared_input_type,
                                            input_element_type))
       else:
         result_pcollection.element_type = declared_output_type
     else:
       result_pcollection.element_type = transform.infer_output_type(
           input_element_type)
예제 #3
0
파일: pipeline.py 프로젝트: scosenza/beam
 def _infer_result_type(self, transform, inputs, result_pcollection):
     # TODO(robertwb): Multi-input inference.
     type_options = self._options.view_as(TypeOptions)
     if type_options is None or not type_options.pipeline_type_check:
         return
     if (isinstance(result_pcollection, pvalue.PCollection)
             and (not result_pcollection.element_type
                  # TODO(robertwb): Ideally we'd do intersection here.
                  or result_pcollection.element_type == typehints.Any)):
         # Single-input, single-output inference.
         input_element_type = (inputs[0].element_type
                               if len(inputs) == 1 else typehints.Any)
         type_hints = transform.get_type_hints()
         declared_output_type = type_hints.simple_output_type(
             transform.label)
         if declared_output_type:
             input_types = type_hints.input_types
             if input_types and input_types[0]:
                 declared_input_type = input_types[0][0]
                 result_pcollection.element_type = typehints.bind_type_variables(
                     declared_output_type,
                     typehints.match_type_variables(declared_input_type,
                                                    input_element_type))
             else:
                 result_pcollection.element_type = declared_output_type
         else:
             result_pcollection.element_type = transform.infer_output_type(
                 input_element_type)
     elif isinstance(result_pcollection, pvalue.DoOutputsTuple):
         # Single-input, multi-output inference.
         # TODO(BEAM-4132): Add support for tagged type hints.
         #   https://github.com/apache/beam/pull/9810#discussion_r338765251
         for pcoll in result_pcollection:
             if pcoll.element_type is None:
                 pcoll.element_type = typehints.Any
예제 #4
0
  def apply(self, transform, pvalueish=None, label=None):
    """Applies a custom transform using the pvalueish specified.

    Args:
      transform (~apache_beam.transforms.ptransform.PTransform): the
        :class:`~apache_beam.transforms.ptransform.PTransform` to apply.
      pvalueish (~apache_beam.pvalue.PCollection): the input for the
        :class:`~apache_beam.transforms.ptransform.PTransform` (typically a
        :class:`~apache_beam.pvalue.PCollection`).
      label (str): label of the
        :class:`~apache_beam.transforms.ptransform.PTransform`.

    Raises:
      ~exceptions.TypeError: if the transform object extracted from the
        argument list is not a
        :class:`~apache_beam.transforms.ptransform.PTransform`.
      ~exceptions.RuntimeError: if the transform object was already applied to
        this pipeline and needs to be cloned in order to apply again.
    """
    if isinstance(transform, ptransform._NamedPTransform):
      return self.apply(transform.transform, pvalueish,
                        label or transform.label)

    if not isinstance(transform, ptransform.PTransform):
      raise TypeError("Expected a PTransform object, got %s" % transform)

    if label:
      # Fix self.label as it is inspected by some PTransform operations
      # (e.g. to produce error messages for type hint violations).
      try:
        old_label, transform.label = transform.label, label
        return self.apply(transform, pvalueish)
      finally:
        transform.label = old_label

    full_label = '/'.join([self._current_transform().full_label,
                           label or transform.label]).lstrip('/')
    if full_label in self.applied_labels:
      raise RuntimeError(
          'Transform "%s" does not have a stable unique label. '
          'This will prevent updating of pipelines. '
          'To apply a transform with a specified label write '
          'pvalue | "label" >> transform'
          % full_label)
    self.applied_labels.add(full_label)

    pvalueish, inputs = transform._extract_input_pvalues(pvalueish)
    try:
      inputs = tuple(inputs)
      for leaf_input in inputs:
        if not isinstance(leaf_input, pvalue.PValue):
          raise TypeError
    except TypeError:
      raise NotImplementedError(
          'Unable to extract PValue inputs from %s; either %s does not accept '
          'inputs of this format, or it does not properly override '
          '_extract_input_pvalues' % (pvalueish, transform))

    current = AppliedPTransform(
        self._current_transform(), transform, full_label, inputs)
    self._current_transform().add_part(current)
    self.transforms_stack.append(current)

    type_options = self._options.view_as(TypeOptions)
    if type_options.pipeline_type_check:
      transform.type_check_inputs(pvalueish)

    pvalueish_result = self.runner.apply(transform, pvalueish)

    if type_options is not None and type_options.pipeline_type_check:
      transform.type_check_outputs(pvalueish_result)

    for result in ptransform.get_nested_pvalues(pvalueish_result):
      assert isinstance(result, (pvalue.PValue, pvalue.DoOutputsTuple))

      # Make sure we set the producer only for a leaf node in the transform DAG.
      # This way we preserve the last transform of a composite transform as
      # being the real producer of the result.
      if result.producer is None:
        result.producer = current
      # TODO(robertwb): Multi-input, multi-output inference.
      # TODO(robertwb): Ideally we'd do intersection here.
      if (type_options is not None and type_options.pipeline_type_check
          and isinstance(result, pvalue.PCollection)
          and not result.element_type):
        input_element_type = (
            inputs[0].element_type
            if len(inputs) == 1
            else typehints.Any)
        type_hints = transform.get_type_hints()
        declared_output_type = type_hints.simple_output_type(transform.label)
        if declared_output_type:
          input_types = type_hints.input_types
          if input_types and input_types[0]:
            declared_input_type = input_types[0][0]
            result.element_type = typehints.bind_type_variables(
                declared_output_type,
                typehints.match_type_variables(declared_input_type,
                                               input_element_type))
          else:
            result.element_type = declared_output_type
        else:
          result.element_type = transform.infer_output_type(input_element_type)

      assert isinstance(result.producer.inputs, tuple)
      current.add_output(result)

    if (type_options is not None and
        type_options.type_check_strictness == 'ALL_REQUIRED' and
        transform.get_type_hints().output_types is None):
      ptransform_name = '%s(%s)' % (transform.__class__.__name__, full_label)
      raise TypeCheckError('Pipeline type checking is enabled, however no '
                           'output type-hint was found for the '
                           'PTransform %s' % ptransform_name)

    current.update_input_refcounts()
    self.transforms_stack.pop()
    return pvalueish_result
예제 #5
0
  def apply(self, transform, pvalueish=None, label=None):
    """Applies a custom transform using the pvalueish specified.

    Args:
      transform: the PTranform to apply.
      pvalueish: the input for the PTransform (typically a PCollection).
      label: label of the PTransform.

    Raises:
      TypeError: if the transform object extracted from the argument list is
        not a PTransform.
      RuntimeError: if the transform object was already applied to this pipeline
        and needs to be cloned in order to apply again.
    """
    if isinstance(transform, ptransform._NamedPTransform):
      return self.apply(transform.transform, pvalueish,
                        label or transform.label)

    if not isinstance(transform, ptransform.PTransform):
      raise TypeError("Expected a PTransform object, got %s" % transform)

    if label:
      # Fix self.label as it is inspected by some PTransform operations
      # (e.g. to produce error messages for type hint violations).
      try:
        old_label, transform.label = transform.label, label
        return self.apply(transform, pvalueish)
      finally:
        transform.label = old_label

    full_label = '/'.join([self._current_transform().full_label,
                           label or transform.label]).lstrip('/')
    if full_label in self.applied_labels:
      raise RuntimeError(
          'Transform "%s" does not have a stable unique label. '
          'This will prevent updating of pipelines. '
          'To apply a transform with a specified label write '
          'pvalue | "label" >> transform'
          % full_label)
    self.applied_labels.add(full_label)

    pvalueish, inputs = transform._extract_input_pvalues(pvalueish)
    try:
      inputs = tuple(inputs)
      for leaf_input in inputs:
        if not isinstance(leaf_input, pvalue.PValue):
          raise TypeError
    except TypeError:
      raise NotImplementedError(
          'Unable to extract PValue inputs from %s; either %s does not accept '
          'inputs of this format, or it does not properly override '
          '_extract_input_pvalues' % (pvalueish, transform))

    current = AppliedPTransform(
        self._current_transform(), transform, full_label, inputs)
    self._current_transform().add_part(current)
    self.transforms_stack.append(current)

    type_options = self._options.view_as(TypeOptions)
    if type_options.pipeline_type_check:
      transform.type_check_inputs(pvalueish)

    pvalueish_result = self.runner.apply(transform, pvalueish)

    if type_options is not None and type_options.pipeline_type_check:
      transform.type_check_outputs(pvalueish_result)

    for result in ptransform.GetPValues().visit(pvalueish_result):
      assert isinstance(result, (pvalue.PValue, pvalue.DoOutputsTuple))

      # Make sure we set the producer only for a leaf node in the transform DAG.
      # This way we preserve the last transform of a composite transform as
      # being the real producer of the result.
      if result.producer is None:
        result.producer = current
      # TODO(robertwb): Multi-input, multi-output inference.
      # TODO(robertwb): Ideally we'd do intersection here.
      if (type_options is not None and type_options.pipeline_type_check
          and isinstance(result, pvalue.PCollection)
          and not result.element_type):
        input_element_type = (
            inputs[0].element_type
            if len(inputs) == 1
            else typehints.Any)
        type_hints = transform.get_type_hints()
        declared_output_type = type_hints.simple_output_type(transform.label)
        if declared_output_type:
          input_types = type_hints.input_types
          if input_types and input_types[0]:
            declared_input_type = input_types[0][0]
            result.element_type = typehints.bind_type_variables(
                declared_output_type,
                typehints.match_type_variables(declared_input_type,
                                               input_element_type))
          else:
            result.element_type = declared_output_type
        else:
          result.element_type = transform.infer_output_type(input_element_type)

      assert isinstance(result.producer.inputs, tuple)
      current.add_output(result)

    if (type_options is not None and
        type_options.type_check_strictness == 'ALL_REQUIRED' and
        transform.get_type_hints().output_types is None):
      ptransform_name = '%s(%s)' % (transform.__class__.__name__, full_label)
      raise TypeCheckError('Pipeline type checking is enabled, however no '
                           'output type-hint was found for the '
                           'PTransform %s' % ptransform_name)

    current.update_input_refcounts()
    self.transforms_stack.pop()
    return pvalueish_result
예제 #6
0
 def match_type_variables(self, concrete_type):
     if isinstance(concrete_type, ShardedKeyTypeConstraint):
         return typehints.match_type_variables(self.key_type,
                                               concrete_type.key_type)
     return {}