def __init__( self, input_base: Optional[Text] = None, input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, range_config: Optional[Union[range_config_pb2.RangeConfig, Dict[Text, Any]]] = None, output_data_format: Optional[int] = example_gen_pb2. FORMAT_TF_EXAMPLE, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, instance_name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input_base: an external directory containing the data files. input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, input files will be treated as a single split. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. range_config: An optional range_config_pb2.RangeConfig instance, specifying the range of span values to consider. If unset, driver will default to searching for latest span with no restrictions. output_data_format: Payload format of generated data in output artifact, one of example_gen_pb2.PayloadFormat enum. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. """ # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) if not example_artifacts: example_artifacts = types.Channel(type=standard_artifacts.Examples) spec = FileBasedExampleGenSpec(input_base=input_base, input_config=input_config, output_config=output_config, custom_config=custom_config, range_config=range_config, output_data_format=output_data_format, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name)
def __init__( self, input: types.Channel = None, # pylint: disable=redefined-builtin input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, input_base: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input: A Channel of type `standard_artifacts.ExternalArtifact`, which includes one artifact whose uri is an external directory containing the data files. _required_ input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, the files under input_base will be treated as a single dataset. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. input_base: Backwards compatibility alias for the 'input' argument. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. Either `input_base` or `input` must be present in the input arguments. """ input = input or input_base # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel_utils.as_channel([ standard_artifacts.Examples(split=str(split_name)) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = FileBasedExampleGenSpec(input_base=input, input_config=input_config, output_config=output_config, custom_config=custom_config, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name)
def __init__( self, input_base: types.Channel = None, input_config: Optional[example_gen_pb2.Input] = None, output_config: Optional[example_gen_pb2.Output] = None, custom_config: Optional[example_gen_pb2.CustomConfig] = None, component_name: Optional[Text] = 'ExampleGen', example_artifacts: Optional[types.Channel] = None, executor_class: Optional[Type[base_executor.BaseExecutor]] = None, input: Optional[types.Channel] = None, # pylint: disable=redefined-builtin name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input_base: A Channel of 'ExternalPath' type, which includes one artifact whose uri is an external directory with data files inside (required). input_config: An optional example_gen_pb2.Input instance, providing input configuration. If unset, the files under input_base (must set) will be treated as a single split. output_config: An optional example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. component_name: Name of the component, should be unique per component class. Default to 'ExampleGen', can be overwritten by sub-classes. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. executor_class: Optional custom executor class overriding the default executor specified in the component attribute. input: Forwards compatibility alias for the 'input_base' argument. name: Unique name for every component class instance. """ input_base = input_base or input # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel_utils.as_channel([ standard_artifacts.Examples(split=split_name) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = FileBasedExampleGenSpec(input_base=input_base, input_config=input_config, output_config=output_config, custom_config=custom_config, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_class=executor_class, name=name)
def __init__( self, # TODO(b/159467778): deprecate this, use input_base instead. input: Optional[types.Channel] = None, # pylint: disable=redefined-builtin input_base: Optional[Text] = None, input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, output_data_format: Optional[int] = example_gen_pb2. FORMAT_TF_EXAMPLE, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, instance_name: Optional[Text] = None): """Construct a FileBasedExampleGen component. Args: input: A Channel of type `standard_artifacts.ExternalArtifact`, which includes one artifact whose uri is an external directory containing the data files. (Deprecated by input_base) input_base: an external directory containing the data files. input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, input files will be treated as a single split. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. output_data_format: Payload format of generated data in output artifact, one of example_gen_pb2.PayloadFormat enum. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. """ if input: logging.warning( 'The "input" argument to the ExampleGen component has been ' 'deprecated by "input_base". Please update your usage as support for ' 'this argument will be removed soon.') input_base = artifact_utils.get_single_uri(list(input.get())) # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) if not example_artifacts: example_artifacts = types.Channel(type=standard_artifacts.Examples) spec = FileBasedExampleGenSpec(input_base=input_base, input_config=input_config, output_config=output_config, custom_config=custom_config, output_data_format=output_data_format, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name)
def __init__( self, input: types.Channel = None, # pylint: disable=redefined-builtin input_config: Optional[Union[example_gen_pb2.Input, Dict[Text, Any]]] = None, output_config: Optional[Union[example_gen_pb2.Output, Dict[Text, Any]]] = None, custom_config: Optional[Union[example_gen_pb2.CustomConfig, Dict[Text, Any]]] = None, example_artifacts: Optional[types.Channel] = None, custom_executor_spec: Optional[executor_spec.ExecutorSpec] = None, input_base: Optional[types.Channel] = None, instance_name: Optional[Text] = None, enable_cache: Optional[bool] = None): """Construct a FileBasedExampleGen component. Args: input: A Channel of type `standard_artifacts.ExternalArtifact`, which includes one artifact whose uri is an external directory containing the data files. _required_ input_config: An [`example_gen_pb2.Input`](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. If unset, the files under input_base will be treated as a single dataset. output_config: An example_gen_pb2.Output instance, providing the output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. custom_config: An optional example_gen_pb2.CustomConfig instance, providing custom configuration for executor. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. custom_executor_spec: Optional custom executor spec overriding the default executor spec specified in the component attribute. input_base: Backwards compatibility alias for the 'input' argument. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. Either `input_base` or `input` must be present in the input arguments. enable_cache: Optional boolean to indicate if cache is enabled for the FileBasedExampleGen component. If not specified, defaults to the value specified for pipeline's enable_cache parameter. """ if input_base: absl.logging.warning( 'The "input_base" argument to the ExampleGen component has ' 'been renamed to "input" and is deprecated. Please update your ' 'usage as support for this argument will be removed soon.') input = input_base # Configure inputs and outputs. input_config = input_config or utils.make_default_input_config() output_config = output_config or utils.make_default_output_config( input_config) if not example_artifacts: artifact = standard_artifacts.Examples() artifact.split_names = artifact_utils.encode_split_names( utils.generate_output_split_names(input_config, output_config)) example_artifacts = channel_utils.as_channel([artifact]) spec = FileBasedExampleGenSpec(input=input, input_config=input_config, output_config=output_config, custom_config=custom_config, examples=example_artifacts) super(FileBasedExampleGen, self).__init__(spec=spec, custom_executor_spec=custom_executor_spec, instance_name=instance_name, enable_cache=enable_cache)