예제 #1
0
파일: driver.py 프로젝트: jay90099/tfx
def main(args):
    executor_input = pipeline_spec_pb2.ExecutorInput()
    json_format.Parse(args.json_serialized_invocation_args,
                      executor_input,
                      ignore_unknown_fields=True)

    _run_driver(executor_input)
예제 #2
0
 def testParseExecutionPropertiesMapsInputBaseUri(self):
   properties_pb = pipeline_pb2.ExecutorInput()
   properties_pb.inputs.parameters[
       'input_base_uri'].string_value = 'gs://input/base'
   self.assertDictEqual(
       {'input_base': 'gs://input/base'},
       kubeflow_v2_entrypoint_utils.parse_execution_properties(
           properties_pb.inputs.parameters))
예제 #3
0
  def setUp(self):
    super(KubeflowV2EntrypointUtilsTest, self).setUp()
    _ARTIFACT_1.uri = 'gs://root/string/'
    # Hash value of
    # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/11111'
    _ARTIFACT_1.id = 9171918664759481579
    _ARTIFACT_1.set_string_custom_property(
        key='my_property_1', value='Test string.')
    _ARTIFACT_2.uri = 'gs://root/model/'
    # Hash value of
    # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/22222'
    _ARTIFACT_2.id = 6826273797600318744
    _ARTIFACT_2.set_float_custom_property(key='my_property_2', value=42.0)
    _ARTIFACT_3.uri = 'gs://root/examples/'
    _ARTIFACT_3.span = 9000
    # Hash value of
    # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/33333'
    _ARTIFACT_3.id = 27709763105391302
    self._expected_dict = {
        _KEY_1: [_ARTIFACT_1],
        _KEY_2: [_ARTIFACT_2],
        _KEY_3: [_ARTIFACT_3],
    }
    source_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
    # Use two protos to store the testdata.
    artifacts_pb = pipeline_pb2.ExecutorInput()
    io_utils.parse_json_file(
        os.path.join(source_data_dir, 'artifacts.json'), artifacts_pb)
    self._artifacts = artifacts_pb.inputs.artifacts

    # Test legacy properties/custom properties deserialization.
    artifacts_legacy_pb = pipeline_pb2.ExecutorInput()
    io_utils.parse_json_file(
        os.path.join(source_data_dir, 'artifacts_legacy.json'),
        artifacts_legacy_pb)
    self._artifacts_legacy = artifacts_legacy_pb.inputs.artifacts

    properties_pb = pipeline_pb2.ExecutorInput()
    io_utils.parse_json_file(
        os.path.join(source_data_dir, 'exec_properties.json'), properties_pb)
    self._properties = properties_pb.inputs.parameters
예제 #4
0
def main(args):
    executor_input = pipeline_spec_pb2.ExecutorInput()
    json_format.Parse(args.json_serialized_invocation_args,
                      executor_input,
                      ignore_unknown_fields=True)

    name_from_id = {}

    exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties(
        executor_input.inputs.parameters)
    outputs_dict = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict(
        executor_input.outputs.artifacts, name_from_id)

    _run_driver(exec_properties, outputs_dict,
                executor_input.outputs.output_file, name_from_id)
예제 #5
0
def main(
    executor_input_str: str,
    function_name: str,
    output_metadata_path: Optional[str] = None):
  """Container entrypoint used by KFP Python function based component

  executor_input_str: A serialized ExecutorInput proto message.
  function_name: The name of the user-defined function.
  output_metadata_path: A local path where the output metadata JSON file should
    be written to.
  """
  executor_input = pipeline_spec_pb2.ExecutorInput()
  json_format.Parse(text=executor_input_str, message=executor_input)
  output_metadata_path = output_metadata_path or executor_input.outputs.output_file
  parameter_dict = {}  # kwargs to be passed to UDF.
  for name, input_param in executor_input.inputs.parameters.items():
    parameter_dict[name] = entrypoint_utils.get_python_value(input_param)

  for name, input_artifacts in executor_input.inputs.artifacts.items():
    parameter_dict[name] = artifact.Artifact.get_from_runtime_artifact(
        input_artifacts.artifacts[0])

  # Also, determine a way to inspect the function signature to decide the type
  # of output artifacts.
  fn = entrypoint_utils.import_func_from_source(FN_SOURCE, function_name)

  # In the ExeuctorInput message passed into the entrypoint, the output artifact
  # URIs are already specified. The output artifact is constructed according to
  # the specified URIs + type information retrieved from function signature.
  output_uris = {}
  for name, output_artifacts in executor_input.outputs.artifacts.items():
    output_uris[name] = output_artifacts.artifacts[0].uri

  output_artifacts = entrypoint_utils.get_output_artifacts(
      fn, output_uris)
  for name, art in output_artifacts.items():
    parameter_dict[name] = art

  # Execute the user function. fn_res is expected to contain output parameters
  # only. It's either an namedtuple or a single primitive value.
  fn_res = fn(**parameter_dict)

  _write_output_metadata_file(
      fn_res=fn_res,
      output_artifacts=output_artifacts,
      output_metadata_path=output_metadata_path)
예제 #6
0
    def setUp(self):
        super().setUp()

        self._executor_invocation = pipeline_pb2.ExecutorInput()
        self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON
        self._executor_invocation.inputs.parameters[
            'input_base'].string_value = _TEST_INPUT_DIR
        self._executor_invocation.inputs.parameters[
            'output_config'].string_value = '{}'
        self._executor_invocation.inputs.parameters[
            'input_config'].string_value = json_format.MessageToJson(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='s1',
                                                pattern='span{SPAN}/split1/*'),
                    example_gen_pb2.Input.Split(name='s2',
                                                pattern='span{SPAN}/split2/*')
                ]))
        self._executor_invocation.outputs.artifacts[
            'examples'].artifacts.append(
                pipeline_pb2.RuntimeArtifact(
                    type=pipeline_pb2.ArtifactTypeSchema(
                        instance_schema=compiler_utils.get_artifact_schema(
                            standard_artifacts.Examples))))

        self._executor_invocation_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'executor_invocation.json'), 'r').read()

        logging.debug('Executor invocation under test: %s',
                      self._executor_invocation_from_file)
        self._expected_result_from_file = fileio.open(
            os.path.join(os.path.dirname(__file__), 'testdata',
                         'expected_output_metadata.json'), 'r').read()
        logging.debug('Expecting output metadata JSON: %s',
                      self._expected_result_from_file)

        # Change working directory after all the testdata files have been read.
        self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))

        fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
예제 #7
0
def _run_executor(args: argparse.Namespace, beam_args: List[str]) -> None:
  """Selects a particular executor and run it based on name.

  Args:
    args:
      --executor_class_path: The import path of the executor class.
      --json_serialized_invocation_args: Full JSON-serialized parameters for
        this execution.
    beam_args: Optional parameter that maps to the optional_pipeline_args
      parameter in the pipeline, which provides additional configuration options
      for apache-beam and tensorflow.logging.
    For more about the beam arguments please refer to:
    https://cloud.google.com/dataflow/docs/guides/specifying-exec-params
  """
  logging.set_verbosity(logging.INFO)

  # Rehydrate inputs/outputs/exec_properties from the serialized metadata.
  executor_input = pipeline_spec_pb2.ExecutorInput()
  json_format.Parse(
      args.json_serialized_invocation_args,
      executor_input,
      ignore_unknown_fields=True)

  inputs_dict = executor_input.inputs.artifacts
  outputs_dict = executor_input.outputs.artifacts
  inputs_parameter = executor_input.inputs.parameters

  if fileio.exists(executor_input.outputs.output_file):
    # It has a driver that outputs the updated exec_properties in this file.
    with fileio.open(executor_input.outputs.output_file,
                     'rb') as output_meta_json:
      output_metadata = pipeline_spec_pb2.ExecutorOutput()
      json_format.Parse(
          output_meta_json.read(), output_metadata, ignore_unknown_fields=True)
      # Append/Overwrite exec_propertise.
      for k, v in output_metadata.parameters.items():
        inputs_parameter[k].CopyFrom(v)

  name_from_id = {}

  inputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict(
      inputs_dict, name_from_id)
  outputs = kubeflow_v2_entrypoint_utils.parse_raw_artifact_dict(
      outputs_dict, name_from_id)
  exec_properties = kubeflow_v2_entrypoint_utils.parse_execution_properties(
      inputs_parameter)
  logging.info('Executor %s do: inputs: %s, outputs: %s, exec_properties: %s',
               args.executor_class_path, inputs, outputs, exec_properties)
  executor_cls = import_utils.import_class_by_path(args.executor_class_path)
  if issubclass(executor_cls, base_beam_executor.BaseBeamExecutor):
    executor_context = base_beam_executor.BaseBeamExecutor.Context(
        beam_pipeline_args=beam_args, unique_id='', tmp_dir='/tmp')
  else:
    executor_context = base_executor.BaseExecutor.Context(
        extra_flags=beam_args, unique_id='', tmp_dir='/tmp')
  executor = executor_cls(executor_context)
  logging.info('Starting executor')
  executor.Do(inputs, outputs, exec_properties)

  # TODO(b/182316162): Unify publisher handling so that post-execution artifact
  # logic is more cleanly handled.
  outputs_utils.tag_output_artifacts_with_version(outputs)  # pylint: disable=protected-access

  # TODO(b/169583143): Remove this workaround when TFX migrates to use str-typed
  # id/name to identify artifacts.
  # Convert ModelBlessing artifact to use managed MLMD resource name.
  if (issubclass(executor_cls, evaluator_executor.Executor) and
      standard_component_specs.BLESSING_KEY in outputs):
    # Parse the parent prefix for managed MLMD resource name.
    kubeflow_v2_entrypoint_utils.refactor_model_blessing(
        artifact_utils.get_single_instance(
            outputs[standard_component_specs.BLESSING_KEY]), name_from_id)

  # Log the output metadata to a file. So that it can be picked up by MP.
  metadata_uri = executor_input.outputs.output_file
  executor_output = pipeline_spec_pb2.ExecutorOutput()
  for k, v in kubeflow_v2_entrypoint_utils.translate_executor_output(
      outputs, name_from_id).items():
    executor_output.artifacts[k].CopyFrom(v)

  fileio.makedirs(os.path.dirname(metadata_uri))
  with fileio.open(metadata_uri, 'wb') as f:
    f.write(json_format.MessageToJson(executor_output))