Example #1
0
    def __init__(self,
                 input_config: Union[example_gen_pb2.Input, Dict[Text, Any]],
                 output_config: Optional[Union[example_gen_pb2.Output,
                                               Dict[Text, Any]]] = None,
                 custom_config: Optional[Union[example_gen_pb2.CustomConfig,
                                               Dict[Text, Any]]] = None,
                 output_data_format: Optional[int] = example_gen_pb2.
                 FORMAT_TF_EXAMPLE,
                 example_artifacts: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct a QueryBasedExampleGen component.

    Args:
      input_config: An
        [example_gen_pb2.Input](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
          instance, providing input configuration. If any field is provided as a
          RuntimeParameter, input_config should be constructed as a dict with
          the same field names as Input proto message. _required_
      output_config: An
        [example_gen_pb2.Output](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
          instance, providing output configuration. If unset, the default splits
        will be labeled as 'train' and 'eval' with a distribution ratio of 2:1.
          If any field is provided as a RuntimeParameter, output_config should
          be constructed as a dict with the same field names as Output proto
          message.
      custom_config: An
        [example_gen_pb2.CustomConfig](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
          instance, providing custom configuration for ExampleGen. If any field
          is provided as a RuntimeParameter, output_config should be constructed
          as a dict.
      output_data_format: Payload format of generated data in output artifact,
        one of example_gen_pb2.PayloadFormat enum.
      example_artifacts: Channel of `standard_artifacts.Examples` for output
        train and eval examples.
      instance_name: Optional unique instance name. Required only if multiple
        ExampleGen components are declared in the same pipeline.

    Raises:
      ValueError: The output_data_format value must be defined in the
        example_gen_pb2.PayloadFormat proto.
    """
        # Configure outputs.
        output_config = output_config or utils.make_default_output_config(
            input_config)
        if not example_artifacts:
            example_artifacts = types.Channel(type=standard_artifacts.Examples)
        if output_data_format not in example_gen_pb2.PayloadFormat.values():
            raise ValueError(
                'The value of output_data_format must be defined in'
                'the example_gen_pb2.PayloadFormat proto.')

        spec = QueryBasedExampleGenSpec(input_config=input_config,
                                        output_config=output_config,
                                        output_data_format=output_data_format,
                                        custom_config=custom_config,
                                        examples=example_artifacts)
        super(QueryBasedExampleGen, self).__init__(spec=spec,
                                                   instance_name=instance_name)
Example #2
0
    def __init__(self,
                 input_config: Union[example_gen_pb2.Input, Dict[Text, Any]],
                 output_config: Optional[Union[example_gen_pb2.Output,
                                               Dict[Text, Any]]] = None,
                 custom_config: Optional[Union[example_gen_pb2.CustomConfig,
                                               Dict[Text, Any]]] = None,
                 example_artifacts: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None,
                 enable_cache: Optional[bool] = None):
        """Construct an QueryBasedExampleGen component.

    Args:
      input_config: An
        [example_gen_pb2.Input](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
          instance, providing input configuration. If any field is provided as a
        RuntimeParameter, input_config should be constructed as a dict with the
        same field names as Input proto message. _required_
      output_config: An
        [example_gen_pb2.Output](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
          instance, providing output configuration. If unset, the default splits
        will be labeled as 'train' and 'eval' with a distribution ratio of 2:1.
        If any field is provided as a RuntimeParameter, output_config should be
        constructed as a dict with the same field names as Output proto message.
      custom_config: An
        [example_gen_pb2.CustomConfig](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
          instance, providing custom configuration for ExampleGen. If any field
          is provided as a RuntimeParameter, output_config should be
          constructed as a dict.
      example_artifacts: Channel of `standard_artifacts.Examples` for output
        train and eval examples.
      instance_name: Optional unique instance name. Required only if multiple
        ExampleGen components are declared in the same pipeline.
      enable_cache: Optional boolean to indicate if cache is enabled for the
        QueryBasedExampleGen component. If not specified, defaults to the value
        specified for pipeline's enable_cache parameter.
    """
        # Configure outputs.
        output_config = output_config or utils.make_default_output_config(
            input_config)
        if not example_artifacts:
            artifact = standard_artifacts.Examples()
            artifact.split_names = artifact_utils.encode_split_names(
                utils.generate_output_split_names(input_config, output_config))
            example_artifacts = channel_utils.as_channel([artifact])
        spec = QueryBasedExampleGenSpec(input_config=input_config,
                                        output_config=output_config,
                                        custom_config=custom_config,
                                        examples=example_artifacts)
        super(_QueryBasedExampleGen,
              self).__init__(spec=spec,
                             instance_name=instance_name,
                             enable_cache=enable_cache)
Example #3
0
  def __init__(self,
               input_config: example_gen_pb2.Input,
               output_config: Optional[example_gen_pb2.Output] = None,
               custom_config: Optional[example_gen_pb2.CustomConfig] = None,
               example_artifacts: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    """Construct an QueryBasedExampleGen component.

    Args:
      input_config: An
        [example_gen_pb2.Input](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
        instance, providing input configuration. _required_
      output_config: An
        [example_gen_pb2.Output](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
        instance, providing output configuration. If unset, the default splits
        will be labeled as 'train' and 'eval' with a distribution ratio of 2:1.
      custom_config: An
        [example_gen_pb2.CustomConfig](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto)
        instance, providing custom configuration for ExampleGen.
      example_artifacts: Channel of 'ExamplesPath' for output train and
        eval examples.
      instance_name: Optional unique instance name. Required only if multiple
        ExampleGen components are declared in the same pipeline.
    """
    # Configure outputs.
    output_config = output_config or utils.make_default_output_config(
        input_config)
    example_artifacts = example_artifacts or channel_utils.as_channel([
        standard_artifacts.Examples(split=split_name)
        for split_name in utils.generate_output_split_names(
            input_config, output_config)
    ])
    spec = QueryBasedExampleGenSpec(
        input_config=input_config,
        output_config=output_config,
        custom_config=custom_config,
        examples=example_artifacts)
    super(_QueryBasedExampleGen, self).__init__(
        spec=spec, instance_name=instance_name)
Example #4
0
  def __init__(self,
               input_config: example_gen_pb2.Input,
               output_config: Optional[example_gen_pb2.Output] = None,
               custom_config: Optional[example_gen_pb2.CustomConfig] = None,
               component_name: Optional[Text] = 'ExampleGen',
               example_artifacts: Optional[types.Channel] = None,
               name: Optional[Text] = None):
    """Construct an QueryBasedExampleGen component.

    Args:
      input_config: An example_gen_pb2.Input instance, providing input
        configuration.
      output_config: An example_gen_pb2.Output instance, providing output
        configuration. If unset, default splits will be 'train' and 'eval' with
        size 2:1.
      custom_config: An optional example_gen_pb2.CustomConfig instance,
        providing custom configuration for executor.
      component_name: Name of the component, should be unique per component
        class. Default to 'ExampleGen', can be overwritten by sub-classes.
      example_artifacts: Optional channel of 'ExamplesPath' for output train and
        eval examples.
      name: Unique name for every component class instance.
    """
    # Configure outputs.
    output_config = output_config or utils.make_default_output_config(
        input_config)
    example_artifacts = example_artifacts or channel_utils.as_channel([
        standard_artifacts.Examples(split=split_name)
        for split_name in utils.generate_output_split_names(
            input_config, output_config)
    ])
    spec = QueryBasedExampleGenSpec(
        input_config=input_config,
        output_config=output_config,
        custom_config=custom_config,
        examples=example_artifacts)
    super(_QueryBasedExampleGen, self).__init__(spec=spec, name=name)