Exemple #1
0
def _write_output_metadata_file(fn_res: Union[int, str, float, NamedTuple],
                                output_artifacts: Dict[str, artifact.Artifact],
                                output_metadata_path: str):
    """Writes the output metadata file to the designated place."""
    # If output_params is a singleton value, needs to transform it to a mapping.
    output_parameters = {}
    if isinstance(fn_res, (int, str, float)):
        output_parameters['output'] = fn_res
    else:
        # When multiple outputs, we'll need to match each field to the output paths.
        for idx, output_name in enumerate(fn_res._fields):
            output_parameters[output_name] = fn_res[idx]

    executor_output = entrypoint_utils.get_executor_output(
        output_artifacts=output_artifacts, output_params=output_parameters)

    with open(output_metadata_path, 'w') as f:
        f.write(json_format.MessageToJson(executor_output))

    return executor_output
    def testGetExecutorOutput(self):
        model = ontology_artifacts.Model()
        model.name = 'test-artifact'
        model.uri = 'gs://root/execution/output'
        model.metadata['test_property'] = 'test value'
        executor_output = entrypoint_utils.get_executor_output(
            output_artifacts={'output': model},
            output_params={
                'int_output': 42,
                'string_output': 'hello world!',
                'float_output': 12.12
            })

        # Renormalize the JSON proto read from testdata. Otherwise there'll be
        # mismatch in the way treating int value.
        expected_output = pipeline_spec_pb2.ExecutorOutput()
        expected_output = json_format.Parse(
            text=_get_text_from_testdata('executor_output.json'),
            message=expected_output)

        self.assertDictEqual(json_format.MessageToDict(expected_output),
                             json_format.MessageToDict(executor_output))
Exemple #3
0
def main_2(**kwargs):
  """Container entrypoint used by KFP Python function based component.

  This function has a dynamic signature, which will be interpreted according to
  the I/O and data-passing contract of KFP Python function components. The
  parameter will be received from command line interface.

  For each declared parameter input of the user function, three command line
  arguments will be recognized:
  1. {name of the parameter}_input_param_metadata_file: The metadata JSON file
     path output by the producer.
  2. {name of the parameter}_input_field_name: The output name of the parameter,
     by which the parameter can be found in the producer metadata JSON file.
  3. {name of the parameter}_input_argo_param: The actual runtime value of the
     input parameter.
  When the producer is a new-styled KFP Python component, 1 and 2 will be
  populated, and when it's a conventional KFP Python component, 3 will be in
  use.

  For each declared artifact input of the user function, three command line args
  will be recognized:
  1. {name of the artifact}_input_path: The actual path, or uri, of the input
     artifact.
  2. {name of the artifact}_input_artifact_metadata_file: The metadata JSON file
     path output by the producer.
  3. {name of the artifact}_input_output_name: The output name of the artifact,
     by which the artifact can be found in the producer metadata JSON file.
  If the producer is a new-styled KFP Python component, 2+3 will be used to give
  user code access to MLMD (custom) properties associated with this artifact;
  if the producer is a conventional KFP Python component, 1 will be used to
  construct an Artifact with only the URI populated.

  For each declared artifact or parameter output of the user function, a command
  line arg, namely, `{name of the artifact|parameter}_(artifact|parameter)_output_path`,
  will be passed to specify the location where the output content is written to.

  In addition, `executor_metadata_json_file` specifies the location where the
  output metadata JSON file will be written.
  """
  if METADATA_FILE_ARG not in kwargs:
    raise RuntimeError('Must specify executor_metadata_json_file')

  # Group arguments according to suffixes.
  input_params_metadata = {}
  input_params_field_name = {}
  input_params_value = {}
  input_artifacts_metadata = {}
  input_artifacts_uri = {}
  input_artifacts_output_name = {}
  output_artifacts_uri = {}
  output_params_path = {}
  for k, v in kwargs.items():
    if k.endswith(PARAM_METADATA_SUFFIX):
      param_name = k[:-len(PARAM_METADATA_SUFFIX)]
      input_params_metadata[param_name] = v
    elif k.endswith(FIELD_NAME_SUFFIX):
      param_name = k[:-len(FIELD_NAME_SUFFIX)]
      input_params_field_name[param_name] = v
    elif k.endswith(ARGO_PARAM_SUFFIX):
      param_name = k[:-len(ARGO_PARAM_SUFFIX)]
      input_params_value[param_name] = v
    elif k.endswith(ARTIFACT_METADATA_SUFFIX):
      artifact_name = k[:-len(ARTIFACT_METADATA_SUFFIX)]
      input_artifacts_metadata[artifact_name] = v
    elif k.endswith(INPUT_URI_SUFFIX):
      artifact_name = k[:-len(INPUT_URI_SUFFIX)]
      input_artifacts_uri[artifact_name] = v
    elif k.endswith(OUTPUT_NAME_SUFFIX):
      artifact_name = k[:-len(OUTPUT_NAME_SUFFIX)]
      input_artifacts_output_name[artifact_name] = v
    elif k.endswith(OUTPUT_PARAM_PATH_SUFFIX):
      param_name = k[:-len(OUTPUT_PARAM_PATH_SUFFIX)]
      output_params_path[param_name] = v
    elif k.endswith(OUTPUT_ARTIFACT_PATH_SUFFIX):
      artifact_name = k[:-len(OUTPUT_ARTIFACT_PATH_SUFFIX)]
      output_artifacts_uri[artifact_name] = v
    elif k not in (METADATA_FILE_ARG, FN_NAME_ARG):
      logging.warning(
          'Got unexpected command line argument: %s=%s Ignoring', k, v)

  # Instantiate POD objects.
  input_params = {}
  for param_name in (
      input_params_value.keys() |
      input_params_field_name.keys() | input_params_metadata.keys()):
    input_param = InputParam(
        value=input_params_value.get(param_name),
        metadata_file=input_params_metadata.get(param_name),
        field_name=input_params_field_name.get(param_name))
    input_params[param_name] = input_param

  input_artifacts = {}
  for artifact_name in (
      input_artifacts_uri.keys() |
      input_artifacts_metadata.keys() |
      input_artifacts_output_name.keys()
  ):
    input_artifact = InputArtifact(
        uri=input_artifacts_uri.get(artifact_name),
        metadata_file=input_artifacts_metadata.get(artifact_name),
        output_name=input_artifacts_output_name.get(artifact_name))
    input_artifacts[artifact_name] = input_artifact

  # Import and invoke the user-provided function.
  # Currently the actual user code is built into container as /ml/main.py
  # which is specified in
  # kfp.containers._component_builder.build_python_component.

  # Also, determine a way to inspect the function signature to decide the type
  # of output artifacts.
  fn_name = kwargs[FN_NAME_ARG]

  fn = entrypoint_utils.import_func_from_source(FN_SOURCE, fn_name)
  # Get the output artifacts and combine them with the provided URIs.
  output_artifacts = entrypoint_utils.get_output_artifacts(
      fn, output_artifacts_uri)
  invoking_kwargs = {}
  for k, v in output_artifacts.items():
    invoking_kwargs[k] = v

  for k, v in input_params.items():
    invoking_kwargs[k] = v.value
  for k, v in input_artifacts.items():
    invoking_kwargs[k] = v.get_artifact()

  # Execute the user function. fn_res is expected to contain output parameters
  # only. It's either an namedtuple or a single primitive value.
  fn_res = fn(**invoking_kwargs)

  if isinstance(fn_res, (int, float, str)) and len(output_params_path) != 1:
    raise RuntimeError('For primitive output a single output param path is '
                       'expected. Got %s' % output_params_path)

  if isinstance(fn_res, (int, float, str)):
    output_name = list(output_params_path.keys())[0]
    # Write the output to the provided path.
    _gcs_helper.GCSHelper.write_to_gcs_path(
        path=output_params_path[output_name],
        content=str(fn_res))
  else:
    # When multiple outputs, we'll need to match each field to the output paths.
    for idx, output_name in enumerate(fn_res._fields):
      path = output_params_path[output_name]
      _gcs_helper.GCSHelper.write_to_gcs_path(
          path=path,
          content=str(fn_res[idx]))

  # Write output metadata JSON file.
  output_parameters = {}
  if isinstance(fn_res, (int, float, str)):
    output_parameters['output'] = fn_res
  else:
    for idx, output_name in enumerate(fn_res._fields):
      output_parameters[output_name] = fn_res[idx]

  executor_output = entrypoint_utils.get_executor_output(
      output_artifacts=output_artifacts,
      output_params=output_parameters)

  _gcs_helper.GCSHelper.write_to_gcs_path(
      path=kwargs[METADATA_FILE_ARG],
      content=json_format.MessageToJson(executor_output))