예제 #1
0
파일: executor.py 프로젝트: jay90099/tfx
    def GetModelPath(self, input_dict: Dict[str, List[types.Artifact]]) -> str:
        """Get input model path to push.

    Pusher can push various types of artifacts if it contains the model. This
    method decides which artifact type is given to the Pusher and extracts the
    real model path. Subclass of Pusher Executor should use this method to
    acquire the source model path.

    Args:
      input_dict: A dictionary of artifacts that is given as the fisrt argument
          to the Executor.Do() method.
    Returns:
      A resolved input model path.
    Raises:
      RuntimeError: If no model path is found from input_dict.
    """
        # Check input_dict['model'] first.
        models = input_dict.get(standard_component_specs.MODEL_KEY)
        if models:
            model = artifact_utils.get_single_instance(models)
            return path_utils.serving_model_path(
                model.uri, path_utils.is_old_model_artifact(model))

        # Falls back to input_dict['infra_blessing']
        blessed_models = input_dict.get(
            standard_component_specs.INFRA_BLESSING_KEY)
        if not blessed_models:
            # Should not reach here; Pusher.__init__ prohibits creating a component
            # without having any of model or infra_blessing inputs.
            raise RuntimeError('Pusher has no model input.')
        model = artifact_utils.get_single_instance(blessed_models)
        if not model.get_int_custom_property(_INFRA_BLESSING_MODEL_FLAG_KEY):
            raise RuntimeError('InfraBlessing does not contain a model. Check '
                               'request_spec.make_warmup is set to True.')
        return path_utils.stamped_model_path(model.uri)
예제 #2
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]):
        """Overrides the tfx_pusher_executor.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Pusher.executor.  custom_config.ai_platform_serving_args
        is consumed by this class.  For the full set of parameters supported by
        Google Cloud AI Platform, refer to
        https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models#creating_a_model_version.

    Returns:
      None
    Raises:
      ValueError: if ai_platform_serving_args is not in
      exec_properties.custom_config.
      RuntimeError: if the Google Cloud AI Platform training job failed.
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        if not self.CheckBlessing(input_dict, output_dict):
            return

        model_export = artifact_utils.get_single_instance(
            input_dict['model_export'])
        model_export_uri = model_export.uri
        model_blessing_uri = artifact_utils.get_single_uri(
            input_dict['model_blessing'])
        model_push = artifact_utils.get_single_instance(
            output_dict['model_push'])
        # TODO(jyzhao): should this be in driver or executor.
        if not tf.gfile.Exists(os.path.join(model_blessing_uri, 'BLESSED')):
            model_push.set_int_custom_property('pushed', 0)
            tf.logging.info('Model on %s was not blessed', model_blessing_uri)
            return

        exec_properties_copy = exec_properties.copy()
        custom_config = exec_properties_copy.pop('custom_config', {})
        ai_platform_serving_args = custom_config['ai_platform_serving_args']

        # Deploy the model.
        model_path = path_utils.serving_model_path(model_export_uri)
        # Note: we do not have a logical model version right now. This
        # model_version is a timestamp mapped to trainer's exporter.
        model_version = os.path.basename(model_path)
        if ai_platform_serving_args is not None:
            runner.deploy_model_for_cmle_serving(model_path, model_version,
                                                 ai_platform_serving_args)

        # Make sure artifacts are populated in a standard way by calling
        # tfx.pusher.executor.Executor.Do().
        exec_properties_copy['push_destination'] = exec_properties.get(
            'push_destination') or self._make_local_temp_destination()
        super(Executor, self).Do(input_dict, output_dict, exec_properties_copy)
 def Do(self, input_dict: Mapping[str, Sequence[artifact.Artifact]],
        output_dict: Mapping[str, Sequence[artifact.Artifact]],
        exec_properties: Mapping[str, Any]) -> None:
     """Overrides BaseExecutor.Do()."""
     args_capture = _ArgsCapture.instance
     args_capture.input_dict = input_dict
     args_capture.output_dict = output_dict
     args_capture.exec_properties = exec_properties
     artifact_utils.get_single_instance(
         output_dict["output"]).set_string_custom_property(
             _TEST_OUTPUT_PROPERTY_KEY, _TEST_OUTPUT_PROPERTY_VALUE)
     blessing = artifact_utils.get_single_instance(output_dict["blessing"])
     # Set to the hash value of
     # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/1'
     blessing.set_int_custom_property(
         constants.ARTIFACT_PROPERTY_BASELINE_MODEL_ID_KEY,
         5743745765020341227)
     # Set to the hash value of
     # 'projects/123456789/locations/us-central1/metadataStores/default/artifacts/2'
     blessing.set_int_custom_property(
         constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY,
         7228748496289751000)
     # Set the blessing result
     blessing.set_int_custom_property(
         constants.ARTIFACT_PROPERTY_BLESSED_KEY, constants.BLESSED_VALUE)
예제 #4
0
파일: executor.py 프로젝트: zw39125432/tfx
    def CheckBlessing(self, input_dict: Dict[Text, List[types.Artifact]],
                      output_dict: Dict[Text, List[types.Artifact]]) -> bool:
        """Check that model is blessed by upstream ModelValidator, or update output.

    Args:
      input_dict: Input dict from input key to a list of artifacts:
        - model_blessing: model blessing path from model_validator. Pusher looks
          for a file named 'BLESSED' to consider the model blessed and safe to
          push.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one.

    Returns:
      True if the model is blessed by validator.
    """
        model_blessing = artifact_utils.get_single_instance(
            input_dict['model_blessing'])
        model_push = artifact_utils.get_single_instance(
            output_dict['model_push'])
        # TODO(jyzhao): should this be in driver or executor.
        if not model_utils.is_model_blessed(model_blessing):
            model_push.set_int_custom_property('pushed', 0)
            absl.logging.info('Model on %s was not blessed',
                              model_blessing.uri)
            return False
        return True
예제 #5
0
  def CheckBlessing(self, input_dict: Dict[Text, List[types.Artifact]]) -> bool:
    """Check that model is blessed by upstream validators.

    Args:
      input_dict: Input dict from input key to a list of artifacts:
        - model_blessing: A `ModelBlessing` artifact from model validator or
          evaluator.
          Pusher looks for a custom property `blessed` in the artifact to check
          it is safe to push.
        - infra_blessing: An `InfraBlessing` artifact from infra validator.
          Pusher looks for a custom proeprty `blessed` in the artifact to
          determine whether the model is mechanically servable from the model
          server to which Pusher is going to push.

    Returns:
      True if the model is blessed by validator.
    """
    model_blessing = artifact_utils.get_single_instance(
        input_dict[MODEL_BLESSING_KEY])
    # TODO(jyzhao): should this be in driver or executor.
    if not model_utils.is_model_blessed(model_blessing):
      logging.info('Model on %s was not blessed by model validation',
                   model_blessing.uri)
      return False
    if INFRA_BLESSING_KEY in input_dict:
      infra_blessing = artifact_utils.get_single_instance(
          input_dict[INFRA_BLESSING_KEY])
      if not model_utils.is_infra_validated(infra_blessing):
        logging.info('Model on %s was not blessed by infra validator',
                     model_blessing.uri)
        return False
    return True
예제 #6
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Contract for running InfraValidator Executor.

    Args:
      input_dict:
        - `model`: Single `Model` artifact that we're validating.
        - `examples`: `Examples` artifacts to be used for test requests.
      output_dict:
        - `blessing`: Single `InfraBlessing` artifact containing the validated
          result. It is an empty file with the name either of INFRA_BLESSED or
          INFRA_NOT_BLESSED.
      exec_properties:
        - `serving_spec`: Serialized `ServingSpec` configuration.
        - `validation_spec`: Serialized `ValidationSpec` configuration.
        - `request_spec`: Serialized `RequestSpec` configuration.
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        model = artifact_utils.get_single_instance(input_dict[_MODEL_KEY])
        blessing = artifact_utils.get_single_instance(
            output_dict[_BLESSING_KEY])

        if input_dict.get(_EXAMPLES_KEY):
            examples = artifact_utils.get_single_instance(
                input_dict[_EXAMPLES_KEY])
        else:
            examples = None

        serving_spec = infra_validator_pb2.ServingSpec()
        json_format.Parse(exec_properties[_SERVING_SPEC_KEY], serving_spec)
        if not serving_spec.model_name:
            serving_spec.model_name = _DEFAULT_MODEL_NAME

        validation_spec = infra_validator_pb2.ValidationSpec()
        if exec_properties.get(_VALIDATION_SPEC_KEY):
            json_format.Parse(exec_properties[_VALIDATION_SPEC_KEY],
                              validation_spec)
        if not validation_spec.num_tries:
            validation_spec.num_tries = _DEFAULT_NUM_TRIES
        if not validation_spec.max_loading_time_seconds:
            validation_spec.max_loading_time_seconds = _DEFAULT_MAX_LOADING_TIME_SEC

        if exec_properties.get(_REQUEST_SPEC_KEY):
            request_spec = infra_validator_pb2.RequestSpec()
            json_format.Parse(exec_properties[_REQUEST_SPEC_KEY], request_spec)
        else:
            request_spec = None

        with self._InstallGracefulShutdownHandler():
            self._Do(
                model=model,
                examples=examples,
                blessing=blessing,
                serving_spec=serving_spec,
                validation_spec=validation_spec,
                request_spec=request_spec,
            )
예제 #7
0
파일: executor.py 프로젝트: ysjeon7/tfx
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]):
        """Overrides the tfx_pusher_executor.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Pusher.executor.  custom_config.ai_platform_serving_args
        is consumed by this class.  For the full set of parameters supported by
        Google Cloud AI Platform, refer to
        https://cloud.google.com/ml-engine/docs/tensorflow/deploying-models#creating_a_model_version.

    Returns:
      None
    Raises:
      ValueError:
        If ai_platform_serving_args is not in exec_properties.custom_config.
        If Serving model path does not start with gs://.
      RuntimeError: if the Google Cloud AI Platform training job failed.
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        model_push = artifact_utils.get_single_instance(
            output_dict[tfx_pusher_executor.PUSHED_MODEL_KEY])
        if not self.CheckBlessing(input_dict):
            model_push.set_int_custom_property('pushed', 0)
            return

        model_export = artifact_utils.get_single_instance(
            input_dict[tfx_pusher_executor.MODEL_KEY])
        model_export_uri = model_export.uri

        exec_properties_copy = exec_properties.copy()
        custom_config = exec_properties_copy.pop('custom_config', {})
        ai_platform_serving_args = custom_config[SERVING_ARGS_KEY]
        if not ai_platform_serving_args:
            raise ValueError(
                '\'ai_platform_serving_args\' is missing in \'custom_config\'')
        # Deploy the model.
        model_path = path_utils.serving_model_path(model_export_uri)
        # Note: we do not have a logical model version right now. This
        # model_version is a timestamp mapped to trainer's exporter.
        model_version = os.path.basename(model_path)
        executor_class_path = '%s.%s' % (self.__class__.__module__,
                                         self.__class__.__name__)
        runner.deploy_model_for_aip_prediction(
            model_path,
            model_version,
            ai_platform_serving_args,
            executor_class_path,
        )

        model_push.set_int_custom_property('pushed', 1)
        model_push.set_string_custom_property('pushed_model', model_path)
예제 #8
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result, optional.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'inference_result' not in output_dict:
            raise ValueError('\'inference_result\' is missing in output dict.')
        output = artifact_utils.get_single_instance(
            output_dict['inference_result'])
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if 'model_blessing' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                output.set_int_custom_property('inferred', 0)
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(input_dict['model'])
        model_path = path_utils.serving_model_path(model.uri)
        logging.info('Use exported model from %s.', model_path)

        data_spec = bulk_inferrer_pb2.DataSpec()
        json_format.Parse(exec_properties['data_spec'], data_spec)
        if self._run_model_inference(
                data_spec, input_dict['examples'], output.uri,
                self._get_inference_spec(model_path, exec_properties)):
            output.set_int_custom_property('inferred', 1)
        else:
            output.set_int_custom_property('inferred', 0)
예제 #9
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Push model to target directory if blessed.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: A dict of execution properties, including:
        - push_destination: JSON string of pusher_pb2.PushDestination instance,
          providing instruction of destination to push model.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        model_push = artifact_utils.get_single_instance(
            output_dict[PUSHED_MODEL_KEY])

        model_push_uri = model_push.uri
        model_export = artifact_utils.get_single_instance(
            input_dict[MODEL_KEY])
        model_export_uri = model_export.uri
        logging.info('Model pushing.')
        # Copy the model to pushing uri.
        model_path = path_utils.serving_model_path(model_export_uri)
        model_version = str(int(time.time()))
        # model_version = path_utils.get_serving_model_version(model_export_uri)
        logging.info('Model version is %s', model_version)
        io_utils.copy_dir(model_path,
                          os.path.join(model_push_uri, model_version))
        logging.info('Model written to %s.', model_push_uri)

        push_destination = pusher_pb2.PushDestination()
        json_format.Parse(exec_properties['push_destination'],
                          push_destination)
        serving_path = os.path.join(push_destination.filesystem.base_directory,
                                    model_version)
        if tf.io.gfile.exists(serving_path):
            logging.info(
                'Destination directory %s already exists, skipping current push.',
                serving_path)
        else:
            # tf.serving won't load partial model, it will retry until fully copied.
            io_utils.copy_dir(model_path, serving_path)
            logging.info('Model written to serving path %s.', serving_path)

        model_push.set_int_custom_property('pushed', 1)
        model_push.set_string_custom_property('pushed_model', model_export_uri)
        model_push.set_int_custom_property('pushed_model_id', model_export.id)
        logging.info('Model pushed to %s.', serving_path)
예제 #10
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Copy the input_data to the output_data.

    For this example that is all that the Executor does.  For a different
    custom component, this is where the real functionality of the component
    would be included.

    This component both reads and writes Examples, but a different component
    might read and write artifacts of other types.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - input_data: A list of type `standard_artifacts.Examples` which will
          often contain two splits, 'train' and 'eval'.
      output_dict: Output dict from key to a list of artifacts, including:
        - output_data: A list of type `standard_artifacts.Examples` which will
          usually contain the same splits as input_data.
      exec_properties: A dict of execution properties, including:
        - name: Optional unique name. Necessary iff multiple Hello components
          are declared in the same pipeline.

    Returns:
      None

    Raises:
      OSError and its subclasses
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        input_artifact = artifact_utils.get_single_instance(
            input_dict['input_data'])
        output_artifact = artifact_utils.get_single_instance(
            output_dict['output_data'])
        output_artifact.split_names = input_artifact.split_names

        split_to_instance = {}

        for split in json.loads(input_artifact.split_names):
            uri = artifact_utils.get_split_uri([input_artifact], split)
            split_to_instance[split] = uri

        for split, instance in split_to_instance.items():
            input_dir = instance
            output_dir = artifact_utils.get_split_uri([output_artifact], split)
            for filename in tf.io.gfile.listdir(input_dir):
                input_uri = os.path.join(input_dir, filename)
                output_uri = os.path.join(output_dir, filename)
                io_utils.copy_file(src=input_uri,
                                   dst=output_uri,
                                   overwrite=True)
예제 #11
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Push model to target directory if blessed.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.  A push
          action delivers the model exports produced by Trainer to the
          destination defined in component config.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: A dict of execution properties, including:
        - push_destination: JSON string of pusher_pb2.PushDestination instance,
          providing instruction of destination to push artifact.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        artifact_export = artifact_utils.get_single_instance(
            input_dict[ARTIFACT_KEY])
        artifact_path = artifact_export.uri

        artifact_push = artifact_utils.get_single_instance(
            output_dict[PUSHED_ARTIFACT_KEY])

        push_destination = pusher_pb2.PushDestination()
        json_format.Parse(exec_properties['push_destination'],
                          push_destination)

        destination_kind = push_destination.WhichOneof('destination')
        if destination_kind == 'filesystem':
            fs_config = push_destination.filesystem
            serving_path = fs_config.base_directory

            copy_dir(artifact_path, serving_path)
            absl.logging.info('artifact written to serving path %s.',
                              serving_path)
        else:
            raise NotImplementedError(
                'Invalid push destination {}'.format(destination_kind))

        # Copy the artifact to pushing uri for archiving.
        copy_dir(artifact_path, artifact_push.uri)
        absl.logging.info('artifact pushed to %s.', artifact_push.uri)
예제 #12
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """
        Args:
            input_dict:
            output_dict:
            exec_properties:
        """
        source = exec_properties[StepKeys.SOURCE]
        args = exec_properties[StepKeys.ARGS]

        c = source_utils.load_source_path_class(source)
        data_step: BaseDataStep = c(**args)

        # Get output split path
        examples_artifact = artifact_utils.get_single_instance(
            output_dict[DATA_SPLIT_NAME])
        split_names = [DATA_SPLIT_NAME]
        examples_artifact.split_names = artifact_utils.encode_split_names(
            split_names)
        output_split_path = artifact_utils.get_split_uri([examples_artifact],
                                                         DATA_SPLIT_NAME)

        with self._make_beam_pipeline() as p:
            (p
             | data_step.read_from_source()
             # | data_step.convert_to_dict()
             | WriteToTFRecord(data_step.schema, output_split_path))
예제 #13
0
파일: driver.py 프로젝트: meixinzhang/tfx
  def _prepare_output_artifacts(
      self,
      input_artifacts: Dict[Text, List[types.Artifact]],
      output_dict: Dict[Text, types.Channel],
      exec_properties: Dict[Text, Any],
      execution_id: int,
      pipeline_info: data_types.PipelineInfo,
      component_info: data_types.ComponentInfo,
  ) -> Dict[Text, List[types.Artifact]]:
    """Overrides BaseDriver._prepare_output_artifacts()."""
    del input_artifacts

    result = channel_utils.unwrap_channel_dict(output_dict)
    if len(result) != 1:
      raise RuntimeError('Multiple output artifacts are not supported.')

    base_output_dir = os.path.join(pipeline_info.pipeline_root,
                                   component_info.component_id)

    example_artifact = artifact_utils.get_single_instance(
        result[utils.EXAMPLES_KEY])
    example_artifact.uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
        base_output_dir, utils.EXAMPLES_KEY, execution_id)
    example_artifact.set_string_custom_property(
        utils.FINGERPRINT_PROPERTY_NAME,
        exec_properties[utils.FINGERPRINT_PROPERTY_NAME])
    example_artifact.set_string_custom_property(
        utils.SPAN_PROPERTY_NAME, exec_properties[utils.SPAN_PROPERTY_NAME])

    base_driver._prepare_output_paths(example_artifact)  # pylint: disable=protected-access

    return result
예제 #14
0
파일: driver_test.py 프로젝트: yifanmai/tfx
  def testPrepareOutputArtifacts(self):
    examples = standard_artifacts.Examples()
    output_dict = {utils.EXAMPLES_KEY: channel_utils.as_channel([examples])}
    exec_properties = {
        utils.SPAN_PROPERTY_NAME: 2,
        utils.VERSION_PROPERTY_NAME: 1,
        utils.FINGERPRINT_PROPERTY_NAME: 'fp'
    }

    pipeline_info = data_types.PipelineInfo(
        pipeline_name='name', pipeline_root=self._test_dir, run_id='rid')
    component_info = data_types.ComponentInfo(
        component_type='type', component_id='cid', pipeline_info=pipeline_info)

    input_artifacts = {}
    output_artifacts = self._example_gen_driver._prepare_output_artifacts(
        input_artifacts, output_dict, exec_properties, 1, pipeline_info,
        component_info)
    examples = artifact_utils.get_single_instance(
        output_artifacts[utils.EXAMPLES_KEY])
    base_output_dir = os.path.join(self._test_dir, component_info.component_id)
    expected_uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
        base_output_dir, 'examples', 1)

    self.assertEqual(examples.uri, expected_uri)
    self.assertEqual(
        examples.get_string_custom_property(utils.FINGERPRINT_PROPERTY_NAME),
        'fp')
    self.assertEqual(
        examples.get_string_custom_property(utils.SPAN_PROPERTY_NAME), '2')
    self.assertEqual(
        examples.get_string_custom_property(utils.VERSION_PROPERTY_NAME), '1')
예제 #15
0
  def _build_importer_spec(self) -> ImporterSpec:
    """Builds ImporterSpec."""
    assert isinstance(self._node, importer.Importer)
    output_channel = self._node.outputs[importer.IMPORT_RESULT_KEY]
    result = ImporterSpec()

    # Importer's output channel contains one artifact instance with
    # additional properties.
    artifact_instance = list(output_channel.get())[0]
    struct_proto = compiler_utils.pack_artifact_properties(artifact_instance)
    if struct_proto:
      result.metadata.CopyFrom(struct_proto)

    result.reimport = bool(self._exec_properties[importer.REIMPORT_OPTION_KEY])
    result.artifact_uri.CopyFrom(
        compiler_utils.value_converter(
            self._exec_properties[importer.SOURCE_URI_KEY]))
    single_artifact = artifact_utils.get_single_instance(
        list(output_channel.get()))
    result.type_schema.CopyFrom(
        pipeline_pb2.ArtifactTypeSchema(
            instance_schema=compiler_utils.get_artifact_schema(
                single_artifact)))

    return result
예제 #16
0
    def testPrepareOutputArtifacts(self):
        examples = standard_artifacts.Examples()
        output_dict = {
            utils.EXAMPLES_KEY: channel_utils.as_channel([examples])
        }
        exec_properties = {
            utils.SPAN_PROPERTY_NAME: '02',
            utils.FINGERPRINT_PROPERTY_NAME: 'fp'
        }

        pipeline_info = data_types.PipelineInfo(pipeline_name='name',
                                                pipeline_root=self._test_dir,
                                                run_id='rid')
        component_info = data_types.ComponentInfo(component_type='type',
                                                  component_id='cid',
                                                  pipeline_info=pipeline_info)

        input_artifacts = {}
        output_artifacts = self._example_gen_driver._prepare_output_artifacts(
            input_artifacts, output_dict, exec_properties, 1, pipeline_info,
            component_info)
        examples = artifact_utils.get_single_instance(
            output_artifacts[utils.EXAMPLES_KEY])
        self.assertEqual(
            examples.uri,
            os.path.join(self._test_dir, 'cid', 'examples', '1', ''))
        self.assertEqual(
            examples.get_string_custom_property(
                utils.FINGERPRINT_PROPERTY_NAME), 'fp')
        self.assertEqual(
            examples.get_string_custom_property(utils.SPAN_PROPERTY_NAME),
            '02')
예제 #17
0
파일: executor.py 프로젝트: ssoudan/tfx_x
  def Do(self,
         input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Stores `custom_config` as an artifact of type `artifacts.PipelineConfiguration`.

    Args:
      input_dict: Empty
      output_dict: Output dict from key to a list of artifacts, including:
        - pipeline_configuration: A list of type `artifacts.PipelineConfiguration`
      exec_properties: A dict of execution properties, including:
        - custom_config: the configuration to save.
    Returns:
      None

    Raises:
      OSError and its subclasses
      ValueError
    """
    self._log_startup(input_dict, output_dict, exec_properties)

    pipeline_configuration = artifact_utils.get_single_instance(output_dict[PIPELINE_CONFIGURATION_KEY])
    custom_config = exec_properties.get(CUSTOM_CONFIG_KEY, "{}")

    output_dir = artifact_utils.get_single_uri([pipeline_configuration])
    output_file = os.path.join(output_dir, 'custom_config.json')

    io_utils.write_string_file(output_file, custom_config)
예제 #18
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """TensorFlow SchemaGen executor entrypoint.

    This infers the schema using tensorflow_data_validation on the precomputed
    stats of 'train' split.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - 'stats': A list of 'ExampleStatistics' type which must contain
          split 'train'. Stats on other splits are ignored.
        - 'statistics': Synonym for 'stats'.
      output_dict: Output dict from key to a list of artifacts, including:
        - output: A list of 'Schema' artifact of size one.
      exec_properties: A dict of execution properties, includes:
        - infer_feature_shape: Whether or not to infer the shape of the feature.
        - exclude_splits: Names of splits that will not be taken into
          consideration when auto-generating a schema.

    Returns:
      None
    """
    # TODO(zhitaoli): Move constants between this file and component.py to a
    # constants.py.
    infer_feature_shape = exec_properties.get(INFER_FEATURE_SHAPE_KEY)

    # Load and deserialize exclude splits from execution properties.
    exclude_splits = json_utils.loads(
        exec_properties.get(EXCLUDE_SPLITS_KEY, 'null')) or []
    if not isinstance(exclude_splits, list):
      raise ValueError('exclude_splits in execution properties needs to be a '
                       'list. Got %s instead.' % type(exclude_splits))

    # Only one schema is generated for all splits.
    schema = None
    stats_artifact = artifact_utils.get_single_instance(
        input_dict[STATISTICS_KEY])
    for split in artifact_utils.decode_split_names(stats_artifact.split_names):
      if split in exclude_splits:
        continue

      logging.info('Processing schema from statistics for split %s.', split)
      stats_uri = io_utils.get_only_uri_in_dir(
          os.path.join(stats_artifact.uri, split))
      if not schema:
        schema = tfdv.infer_schema(
            tfdv.load_statistics(stats_uri), infer_feature_shape)
      else:
        schema = tfdv.update_schema(schema, tfdv.load_statistics(stats_uri),
                                    infer_feature_shape)

    output_uri = os.path.join(
        artifact_utils.get_single_uri(output_dict[SCHEMA_KEY]),
        _DEFAULT_FILE_NAME)
    io_utils.write_pbtxt_file(output_uri, schema)
    logging.info('Schema written to %s.', output_uri)
예제 #19
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        self._log_startup(input_dict, output_dict, exec_properties)

        data_view_artifact = artifact_utils.get_single_instance(
            input_dict.get(_DATA_VIEW_KEY))
        input_examples_artifact = artifact_utils.get_single_instance(
            input_dict.get(_INPUT_EXAMPLES_KEY))
        output_examples_artifact = artifact_utils.get_single_instance(
            output_dict.get(_OUTPUT_EXAMPLES_KEY, []))

        # The output artifact shares the URI and all other properties with the
        # input, with the following additional custom properties added.
        output_examples_artifact.copy_from(input_examples_artifact)
        output_examples_artifact.set_int_custom_property(
            constants.DATA_VIEW_ID_PROPERTY_KEY, data_view_artifact.id)
        output_examples_artifact.set_string_custom_property(
            constants.DATA_VIEW_URI_PROPERTY_KEY, data_view_artifact.uri)
예제 #20
0
    def CheckBlessing(self, input_dict: Dict[Text,
                                             List[types.Artifact]]) -> bool:
        """Check that model is blessed by upstream validators.

    Args:
      input_dict: Input dict from input key to a list of artifacts:
        - model_blessing: A `ModelBlessing` artifact from model validator or
          evaluator.
          Pusher looks for a custom property `blessed` in the artifact to check
          it is safe to push.
        - infra_blessing: An `InfraBlessing` artifact from infra validator.
          Pusher looks for a custom proeprty `blessed` in the artifact to
          determine whether the model is mechanically servable from the model
          server to which Pusher is going to push.

    Returns:
      True if the model is blessed by validator.
    """
        # TODO(jyzhao): should this be in driver or executor.
        maybe_model_blessing = input_dict.get(
            standard_component_specs.MODEL_BLESSING_KEY)
        if maybe_model_blessing:
            model_blessing = artifact_utils.get_single_instance(
                maybe_model_blessing)
            if not model_utils.is_model_blessed(model_blessing):
                logging.info('Model on %s was not blessed by model validation',
                             model_blessing.uri)
                return False
        maybe_infra_blessing = input_dict.get(
            standard_component_specs.INFRA_BLESSING_KEY)
        if maybe_infra_blessing:
            infra_blessing = artifact_utils.get_single_instance(
                maybe_infra_blessing)
            if not model_utils.is_infra_validated(infra_blessing):
                logging.info('Model on %s was not blessed by infra validator',
                             infra_blessing.uri)
                return False
        if not maybe_model_blessing and not maybe_infra_blessing:
            logging.warning(
                'Pusher is going to push the model without validation. '
                'Consider using Evaluator or InfraValidator in your '
                'pipeline.')
        return True
예제 #21
0
  def __init__(self,
               statistics: types.Channel = None,
               schema: types.Channel = None,
               exclude_splits: Optional[List[Text]] = None,
               output: Optional[types.Channel] = None,
               stats: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    """Construct an ExampleValidator component.

    Args:
      statistics: A Channel of type `standard_artifacts.ExampleStatistics`. This
        should contain at least 'eval' split. Other splits are currently
        ignored.
      schema: A Channel of type `standard_artifacts.Schema`. _required_
      exclude_splits: Names of splits that the example validator should not
        validate. Default behavior (when exclude_splits is set to None)
        is excluding no splits.
      output: Output channel of type `standard_artifacts.ExampleAnomalies`.
      stats: Backwards compatibility alias for the 'statistics' argument.
      instance_name: Optional name assigned to this specific instance of
        ExampleValidator. Required only if multiple ExampleValidator components
        are declared in the same pipeline.  Either `stats` or `statistics` must
        be present in the arguments.
    """
    if stats:
      logging.warning(
          'The "stats" argument to the StatisticsGen component has '
          'been renamed to "statistics" and is deprecated. Please update your '
          'usage as support for this argument will be removed soon.')
      statistics = stats
    if exclude_splits is None:
      exclude_splits = []
      logging.info('Excluding no splits because exclude_splits is not set.')
    anomalies = output
    if not anomalies:
      anomalies_artifact = standard_artifacts.ExampleAnomalies()
      statistics_split_names = artifact_utils.decode_split_names(
          artifact_utils.get_single_instance(list(
              statistics.get())).split_names)
      split_names = [
          split for split in statistics_split_names
          if split not in exclude_splits
      ]
      anomalies_artifact.split_names = artifact_utils.encode_split_names(
          split_names)
      anomalies = types.Channel(
          type=standard_artifacts.ExampleAnomalies,
          artifacts=[anomalies_artifact])
    spec = ExampleValidatorSpec(
        statistics=statistics,
        schema=schema,
        exclude_splits=json_utils.dumps(exclude_splits),
        anomalies=anomalies)
    super(ExampleValidator, self).__init__(
        spec=spec, instance_name=instance_name)
예제 #22
0
 def testGetFromSingleList(self):
   """Test various retrieval utilities on a single list of Artifact."""
   artifacts = [standard_artifacts.Examples()]
   artifacts[0].uri = '/tmp/evaluri'
   artifacts[0].split_names = '["eval"]'
   self.assertEqual(artifacts[0],
                    artifact_utils.get_single_instance(artifacts))
   self.assertEqual('/tmp/evaluri', artifact_utils.get_single_uri(artifacts))
   self.assertEqual('/tmp/evaluri/eval',
                    artifact_utils.get_split_uri(artifacts, 'eval'))
   with self.assertRaises(ValueError):
     artifact_utils.get_split_uri(artifacts, 'train')
예제 #23
0
    def __init__(self,
                 examples: types.Channel = None,
                 schema: Optional[types.Channel] = None,
                 stats_options: Optional[tfdv.StatsOptions] = None,
                 output: Optional[types.Channel] = None,
                 input_data: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None,
                 enable_cache: Optional[bool] = None):
        """Construct a StatisticsGen component.

    Args:
      examples: A Channel of `ExamplesPath` type, likely generated by the
        [ExampleGen component](https://www.tensorflow.org/tfx/guide/examplegen).
        This needs to contain two splits labeled `train` and `eval`. _required_
      schema: A `Schema` channel to use for automatically configuring the value
        of stats options passed to TFDV.
      stats_options: The StatsOptions instance to configure optional TFDV
        behavior. When stats_options.schema is set, it will be used instead of
        the `schema` channel input. Due to the requirement that stats_options be
        serialized, the slicer functions and custom stats generators are dropped
        and are therefore not usable.
      output: `ExampleStatisticsPath` channel for statistics of each split
        provided in the input examples.
      input_data: Backwards compatibility alias for the `examples` argument.
      instance_name: Optional name assigned to this specific instance of
        StatisticsGen.  Required only if multiple StatisticsGen components are
        declared in the same pipeline.
      enable_cache: Optional boolean to indicate if cache is enabled for the
        StatisticsGen component. If not specified, defaults to the value
        specified for pipeline's enable_cache parameter.
    """
        if input_data:
            absl.logging.warning(
                'The "input_data" argument to the StatisticsGen component has '
                'been renamed to "examples" and is deprecated. Please update your '
                'usage as support for this argument will be removed soon.')
            examples = input_data
        if not output:
            statistics_artifact = standard_artifacts.ExampleStatistics()
            statistics_artifact.split_names = artifact_utils.get_single_instance(
                list(examples.get())).split_names
            output = types.Channel(type=standard_artifacts.ExampleStatistics,
                                   artifacts=[statistics_artifact])
        # TODO(b/150802589): Move jsonable interface to tfx_bsl and use json_utils.
        stats_options_json = stats_options.to_json() if stats_options else None
        spec = StatisticsGenSpec(examples=examples,
                                 schema=schema,
                                 stats_options_json=stats_options_json,
                                 statistics=output)
        super(StatisticsGen, self).__init__(spec=spec,
                                            instance_name=instance_name,
                                            enable_cache=enable_cache)
예제 #24
0
  def test_get_from_split_list(self):
    """Test various retrieval utilities on a list of split Artifact."""
    split_list = []
    for split in ['train', 'eval']:
      instance = artifact.Artifact('MyTypeName', split=split)
      instance.uri = '/tmp/' + split
      split_list.append(instance)

    with self.assertRaises(ValueError):
      artifact_utils.get_single_instance(split_list)

    with self.assertRaises(ValueError):
      artifact_utils.get_single_uri(split_list)

    self.assertEqual(split_list[0],
                     artifact_utils._get_split_instance(split_list, 'train'))
    self.assertEqual('/tmp/train',
                     artifact_utils.get_split_uri(split_list, 'train'))
    self.assertEqual(split_list[1],
                     artifact_utils._get_split_instance(split_list, 'eval'))
    self.assertEqual('/tmp/eval',
                     artifact_utils.get_split_uri(split_list, 'eval'))
예제 #25
0
def translate_executor_output(
        output_dict: Mapping[str, List[artifact.Artifact]],
        name_from_id: Mapping[int,
                              str]) -> Dict[str, pipeline_pb2.ArtifactList]:
    """Translates output_dict to a Kubeflow ArtifactList mapping."""
    result = {}
    for k, v in output_dict.items():
        result[k] = pipeline_pb2.ArtifactList(artifacts=[
            to_runtime_artifact(artifact_utils.get_single_instance(v),
                                name_from_id)
        ])

    return result
예제 #26
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:

        source = exec_properties[StepKeys.SOURCE]
        args = exec_properties[StepKeys.ARGS]

        c = source_utils.load_source_path_class(source)
        tokenizer_step: BaseTokenizer = c(**args)

        tokenizer_location = artifact_utils.get_single_uri(
            output_dict["tokenizer"])

        split_uris, split_names, all_files = [], [], []
        for artifact in input_dict["examples"]:
            for split in artifact_utils.decode_split_names(
                    artifact.split_names):
                split_names.append(split)
                uri = os.path.join(artifact.uri, split)
                split_uris.append((split, uri))
                all_files += path_utils.list_dir(uri)

        # Get output split path
        output_examples = artifact_utils.get_single_instance(
            output_dict["output_examples"])
        output_examples.split_names = artifact_utils.encode_split_names(
            split_names)

        if not tokenizer_step.skip_training:
            tokenizer_step.train(files=all_files)

            tokenizer_step.save(output_dir=tokenizer_location)

        with self._make_beam_pipeline() as p:
            for split, uri in split_uris:
                input_uri = io_utils.all_files_pattern(uri)

                _ = (p
                     | 'ReadData.' + split >> beam.io.ReadFromTFRecord(
                            file_pattern=input_uri)
                     | "ParseTFExFromString." + split >> beam.Map(
                            tf.train.Example.FromString)
                     | "AddTokens." + split >> beam.Map(
                            append_tf_example,
                            tokenizer_step=tokenizer_step)
                     | 'Serialize.' + split >> beam.Map(
                            lambda x: x.SerializeToString())
                     | 'WriteSplit.' + split >> WriteSplit(
                            get_split_uri(
                                output_dict["output_examples"],
                                split)))
예제 #27
0
파일: executor.py 프로젝트: google/nitroml
    def Do(self, input_dict: Dict[Text, List[Artifact]],
           output_dict: Dict[Text, List[Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Take evaluator output and publish results to MLMD.

    It updates custom properties of BenchmarkResult artifact to contain
    benchmark results.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - evaluation: Model evaluation results.
      output_dict: Output dict from key to a list of artifacts, including:
        - benchmark_result: `BenchmarkResult` artifact.
      exec_properties: A dict of execution properties, including either one of:
        - benchmark_name: An unique name of a benchmark.

    Raises:
      ValueError: If evaluation uri doesn't exists.
    """

        uri = artifact_utils.get_single_uri(input_dict['evaluation'])
        if not tf.io.gfile.exists(uri):
            raise ValueError('The uri="{}" does not exist.'.format(uri))

        benchmark_result = artifact_utils.get_single_instance(
            output_dict['benchmark_result'])
        benchmark_result.set_string_custom_property(
            br.BenchmarkResult.BENCHMARK_NAME_KEY,
            exec_properties['benchmark_name'])
        benchmark_result.set_int_custom_property(
            br.BenchmarkResult.BENCHMARK_RUN_KEY, exec_properties['run'])
        benchmark_result.set_int_custom_property(
            br.BenchmarkResult.RUNS_PER_BENCHMARK_KEY,
            exec_properties['num_runs'])

        # Publish evaluation metrics
        evals = self._load_evaluation(uri)
        for name, val in evals.items():
            # TODO(b/151723291): Use correct type instead of string.
            benchmark_result.set_string_custom_property(name, str(val))

        context_properties = serialize.decode(
            exec_properties['additional_context'])
        # TODO(b/175802446): Add additional properties storing
        # `additional_context` and `metric` keys so user can distinguish between
        # custom properties.

        for name, val in context_properties.items():
            # TODO(b/151723291): Use correct type instead of string.
            benchmark_result.set_string_custom_property(name, str(val))
예제 #28
0
 def test_get_from_single_list(self):
   """Test various retrieval utilities on a single list of Artifact."""
   single_list = [artifact.Artifact('MyTypeName', split='eval')]
   single_list[0].uri = '/tmp/evaluri'
   self.assertEqual(single_list[0],
                    artifact_utils.get_single_instance(single_list))
   self.assertEqual('/tmp/evaluri', artifact_utils.get_single_uri(single_list))
   self.assertEqual(single_list[0],
                    artifact_utils._get_split_instance(single_list, 'eval'))
   self.assertEqual('/tmp/evaluri',
                    artifact_utils.get_split_uri(single_list, 'eval'))
   with self.assertRaises(ValueError):
     artifact_utils._get_split_instance(single_list, 'train')
   with self.assertRaises(ValueError):
     artifact_utils.get_split_uri(single_list, 'train')
예제 #29
0
  def testGetFromSplits(self):
    """Test various retrieval utilities on a list of split Artifact."""
    artifacts = [standard_artifacts.Examples()]
    artifacts[0].uri = '/tmp'
    artifacts[0].split_names = artifact_utils.encode_split_names(
        ['train', 'eval'])

    self.assertEqual(artifacts[0].split_names, '["train", "eval"]')

    self.assertIs(artifact_utils.get_single_instance(artifacts), artifacts[0])
    self.assertEqual('/tmp', artifact_utils.get_single_uri(artifacts))
    self.assertEqual('/tmp/train',
                     artifact_utils.get_split_uri(artifacts, 'train'))
    self.assertEqual('/tmp/eval',
                     artifact_utils.get_split_uri(artifacts, 'eval'))
예제 #30
0
  def __init__(self,
               input_examples: types.Channel,
               data_view: types.Channel,
               output_examples: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    if not output_examples:
      output_artifact = standard_artifacts.Examples()
      output_artifact.copy_from(
          artifact_utils.get_single_instance(list(input_examples.get())))
      output_examples = channel_utils.as_channel([output_artifact])

    spec = _DataViewBinderComponentSpec(
        input_examples=input_examples,
        data_view=data_view,
        output_examples=output_examples)
    super().__init__(spec=spec, instance_name=instance_name)