Example #1
0
def create_container_component(
    name: Text,
    image: Text,
    command: List[executor_specs.CommandlineArgumentType],
    inputs: Dict[Text, Any] = None,
    outputs: Dict[Text, Any] = None,
    parameters: Dict[Text, Any] = None,
) -> Callable[..., base_component.BaseComponent]:
    """Creates a container-based component.

  Args:
    name: The name of the component
    image: Container image name.
    command: Container entrypoint command-line. Not executed within a shell. The
      command-line can use placeholder objects that will be replaced at the
      compilation time. The placeholder objects can be imported from
      tfx.dsl.component.experimental.placeholders. Note that Jinja templates are
      not supported.
    inputs: The list of component inputs
    outputs: The list of component outputs
    parameters: The list of component parameters

  Returns:
    Component that can be instantiated and user inside pipeline.

  Example:

    component = create_container_component(
        name='TrainModel',
        inputs={
            'training_data': Dataset,
        },
        outputs={
            'model': Model,
        },
        parameters={
            'num_training_steps': int,
        },
        image='gcr.io/my-project/my-trainer',
        command=[
            'python3', 'my_trainer',
            '--training_data_uri', InputUriPlaceholder('training_data'),
            '--model_uri', OutputUriPlaceholder('model'),
            '--num_training-steps', InputValuePlaceholder('num_training_steps'),
        ]
    )
  """
    if not name:
        raise ValueError('Component name cannot be empty.')

    if inputs is None:
        inputs = {}
    if outputs is None:
        outputs = {}
    if parameters is None:
        parameters = {}

    input_channel_parameters = {}
    output_channel_parameters = {}
    output_channels = {}
    execution_parameters = {}

    for input_name, channel_type in inputs.items():
        # TODO(b/155804245) Sanitize the names so that they're valid python names
        input_channel_parameters[input_name] = (
            component_spec.ChannelParameter(type=channel_type, ))

    for output_name, channel_type in outputs.items():
        # TODO(b/155804245) Sanitize the names so that they're valid python names
        output_channel_parameters[output_name] = (
            component_spec.ChannelParameter(type=channel_type))
        artifact = channel_type()
        channel = channel_utils.as_channel([artifact])
        output_channels[output_name] = channel

    for param_name, parameter_type in parameters.items():
        # TODO(b/155804245) Sanitize the names so that they're valid python names

        execution_parameters[param_name] = (component_spec.ExecutionParameter(
            type=parameter_type))

    default_init_args = {**output_channels}

    return component_utils.create_tfx_component_class(
        name=name,
        tfx_executor_spec=executor_specs.TemplatedExecutorContainerSpec(
            image=image,
            command=command,
        ),
        input_channel_parameters=input_channel_parameters,
        output_channel_parameters=output_channel_parameters,
        execution_parameters=execution_parameters,
        default_init_args=default_init_args)
Example #2
0
def create_ai_platform_training(
    name: Text,
    project_id: Text,
    region: Optional[Text] = None,
    job_id: Optional[Text] = None,
    image_uri: Optional[Text] = None,
    args: Optional[List[placeholders.CommandlineArgumentType]] = None,
    # TODO(jxzheng): support Python training spec
    scale_tier: Optional[Text] = None,
    training_input: Optional[Dict[Text, Any]] = None,
    labels: Optional[Dict[Text, Text]] = None,
    inputs: Dict[Text, Any] = None,
    outputs: Dict[Text, Any] = None,
    parameters: Dict[Text, Any] = None,
) -> base_component.BaseComponent:
    """Creates a pipeline step that launches a AIP training job.

  The generated TFX component will have a component spec specified dynamically,
  through inputs/outputs/parameters in the following format:
  - inputs: A mapping from input name to the upstream channel connected. The
      artifact type of the channel will be automatically inferred.
  - outputs: A mapping from output name to the associated artifact type.
  - parameters: A mapping from execution property names to its associated value.
      Only primitive typed values are supported. Note that RuntimeParameter is
      not supported yet.

  For example:

  create_ai_platform_training(
    ...
    inputs: {
        # Assuming there is an upstream node example_gen, with an output
        # 'examples' of the type Examples.
        'examples': example_gen.outputs['examples'],
    },
    outputs: {
        'model': standard_artifacts.Model,
    },
    parameters: {
        'n_steps': 100,
        'optimizer': 'sgd',
    }
    ...
  )

  will generate a component instance with a component spec equivalent to:

  class MyComponentSpec(ComponentSpec):
    INPUTS = {
        'examples': ChannelParameter(type=standard_artifacts.Examples)
    }
    OUTPUTS = {
        'model': ChannelParameter(type=standard_artifacts.Model)
    }
    PARAMETERS = {
        'n_steps': ExecutionParameter(type=int),
        'optimizer': ExecutionParameter(type=str)
    }

  with its input 'examples' is connected to the example_gen output, and
  execution properties specified as 100 and 'sgd' respectively.

  Example usage of the component:
    # A single node training job.
    my_train = create_ai_platform_training(
        name='my_training_step',
        project_id='my-project',
        region='us-central1',
        image_uri='gcr.io/my-project/caip-training-test:latest',
        'args': [
            '--examples',
            placeholders.InputUriPlaceholder('examples'),
            '--n-steps',
            placeholders.InputValuePlaceholder('n_step'),
            '--output-location',
            placeholders.OutputUriPlaceholder('model')
        ]
        scale_tier='BASIC_GPU',
        inputs={'examples': example_gen.outputs['examples']},
        outputs={
            'model': standard_artifacts.Model
        },
        parameters={'n_step': 100}
    )

    # More complex setting can be expressed by providing training_input
    # directly.
    my_distributed_train = create_ai_platform_training(
        name='my_training_step',
        project_id='my-project',
        training_input={
            'scaleTier':
                'CUSTOM',
            'region':
                'us-central1',
            'masterType': 'n1-standard-8',
            'masterConfig': {
                'imageUri': 'gcr.io/my-project/my-dist-training:latest'
            },
            'workerType': 'n1-standard-8',
            'workerCount': 8,
            'workerConfig': {
                'imageUri': 'gcr.io/my-project/my-dist-training:latest'
            },
            'args': [
                '--examples',
                placeholders.InputUriPlaceholder('examples'),
                '--n-steps',
                placeholders.InputValuePlaceholder('n_step'),
                '--output-location',
                placeholders.OutputUriPlaceholder('model')
            ]
        },
        inputs={'examples': example_gen.outputs['examples']},
        outputs={'model': Model},
        parameters={'n_step': 100}
    )

  Args:
    name: name of the component. This is needed to construct the component spec
      and component class dynamically as well.
    project_id: the GCP project under which the AIP training job will be
      running.
    region: GCE region where the AIP training job will be running.
    job_id: the unique ID of the job. Default to 'tfx_%Y%m%d%H%M%S'
    image_uri: the GCR location of the container image, which will be used to
      execute the training program. If the same field is specified in
      training_input, the latter overrides image_uri.
    args: command line arguments that will be passed into the training program.
      Users can use placeholder semantics as in
      tfx.dsl.component.experimental.container_component to wire the args with
      component inputs/outputs/parameters.
    scale_tier: Cloud ML resource requested by the job. See
      https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs#ScaleTier
    training_input: full training job spec. This field overrides other
      specifications if applicable. This field follows the
      [TrainingInput](https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs#traininginput)
        schema.
    labels: user-specified label attached to the job.
    inputs: the dict of component inputs.
    outputs: the dict of component outputs.
    parameters: the dict of component parameters, aka, execution properties.

  Returns:
    A component instance that represents the AIP job in the DSL.

  Raises:
    ValueError: when image_uri is missing and masterConfig is not specified in
      training_input, or when region is missing and training_input
      does not provide region either.
    TypeError: when non-primitive parameters are specified.
  """
    training_input = training_input or {}

    if scale_tier and not training_input.get('scale_tier'):
        training_input['scaleTier'] = scale_tier

    if not training_input.get('masterConfig'):
        # If no replica config is specified, create a default one.
        if not image_uri:
            raise ValueError('image_uri is required when masterConfig is not '
                             'explicitly specified in training_input.')
        training_input['masterConfig'] = {'imageUri': image_uri}
        # Note: A custom entrypoint can be set to training_input['masterConfig']
        # through key 'container_command'.

    training_input['args'] = args

    if not training_input.get('region'):
        if not region:
            raise ValueError('region is required when it is not set in '
                             'training_input.')
        training_input['region'] = region

    # Squash training_input, project, job_id, and labels into an exec property
    # namely 'aip_training_config'.
    aip_training_config = {
        ai_platform_training_executor.PROJECT_CONFIG_KEY: project_id,
        ai_platform_training_executor.TRAINING_INPUT_CONFIG_KEY:
        training_input,
        ai_platform_training_executor.JOB_ID_CONFIG_KEY: job_id,
        ai_platform_training_executor.LABELS_CONFIG_KEY: labels,
    }

    aip_training_config_str = json_utils.dumps(aip_training_config)

    # Construct the component spec.
    if inputs is None:
        inputs = {}
    if outputs is None:
        outputs = {}
    if parameters is None:
        parameters = {}

    input_channel_parameters = {}
    output_channel_parameters = {}
    output_channels = {}
    execution_parameters = {
        ai_platform_training_executor.CONFIG_KEY:
        component_spec.ExecutionParameter(type=(str, Text))
    }

    for input_name, single_channel in inputs.items():
        # Infer the type of input channels based on the channels passed in.
        # TODO(b/155804245) Sanitize the names so that they're valid python names
        input_channel_parameters[input_name] = (
            component_spec.ChannelParameter(type=single_channel.type))

    for output_name, channel_type in outputs.items():
        # TODO(b/155804245) Sanitize the names so that they're valid python names
        output_channel_parameters[output_name] = (
            component_spec.ChannelParameter(type=channel_type))
        artifact = channel_type()
        channel = channel_utils.as_channel([artifact])
        output_channels[output_name] = channel

    # TODO(jxzheng): Support RuntimeParameter as parameters.
    for param_name, single_parameter in parameters.items():
        # Infer the type of parameters based on the parameters passed in.
        # TODO(b/155804245) Sanitize the names so that they're valid python names
        if not isinstance(single_parameter, (int, float, Text, bytes)):
            raise TypeError(
                'Parameter can only be int/float/str/bytes, got {}'.format(
                    type(single_parameter)))
        execution_parameters[param_name] = (component_spec.ExecutionParameter(
            type=type(single_parameter)))

    default_init_args = {
        **inputs,
        **output_channels,
        **parameters, ai_platform_training_executor.CONFIG_KEY:
        aip_training_config_str
    }

    tfx_component_class = component_utils.create_tfx_component_class(
        name=name,
        tfx_executor_spec=executor_spec.ExecutorClassSpec(
            ai_platform_training_executor.AiPlatformTrainingExecutor),
        input_channel_parameters=input_channel_parameters,
        output_channel_parameters=output_channel_parameters,
        execution_parameters=execution_parameters,
        default_init_args=default_init_args)

    return tfx_component_class()