Python copy_dir Examples, tfx.utils.io_utils.copy_dir Python Examples

Example #1

0

Show file

File: executor.py Project: nex3z/tfx

    def _PrepareModelPath(
            self, model_uri: Text,
            serving_spec: infra_validator_pb2.ServingSpec) -> Text:
        model_path = path_utils.serving_model_path(model_uri)
        serving_binary = serving_spec.WhichOneof('serving_binary')
        if serving_binary == 'tensorflow_serving':
            # TensorFlow Serving requires model to be stored in its own directory
            # structure flavor. If current model_path does not conform to the flavor,
            # we need to make a copy to the temporary path.
            try:
                # Check whether current model_path conforms to the tensorflow serving
                # model path flavor. (Parsed without exception)
                tf_serving_flavor.parse_model_path(
                    model_path, expected_model_name=serving_spec.model_name)
            except ValueError:
                # Copy the model to comply with the tensorflow serving model path
                # flavor.
                temp_model_path = tf_serving_flavor.make_model_path(
                    model_base_path=self._get_tmp_dir(),
                    model_name=serving_spec.model_name,
                    version=int(time.time()))
                io_utils.copy_dir(src=model_path, dst=temp_model_path)
                return temp_model_path

        return model_path

Example #2

0

Show file

  def testDoWithCustomSplits(self):
    # Update input dict.
    io_utils.copy_dir(
        os.path.join(self._testdata_dir, 'iris/data/train'),
        os.path.join(self._output_data_dir, 'data/training'))
    io_utils.copy_dir(
        os.path.join(self._testdata_dir, 'iris/data/eval'),
        os.path.join(self._output_data_dir, 'data/evaluating'))
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(self._output_data_dir, 'data')
    examples.split_names = artifact_utils.encode_split_names(
        ['training', 'evaluating'])
    self._input_dict['examples'] = [examples]

    # Update exec properties skeleton with custom splits.
    self._exec_properties['train_args'] = json_format.MessageToJson(
        trainer_pb2.TrainArgs(splits=['training'], num_steps=1000),
        preserving_proto_field_name=True)
    self._exec_properties['eval_args'] = json_format.MessageToJson(
        trainer_pb2.EvalArgs(splits=['evaluating'], num_steps=500),
        preserving_proto_field_name=True)
    self._exec_properties['module_file'] = os.path.join(self._testdata_dir,
                                                        'module_file',
                                                        'tuner_module.py')

    tuner = executor.Executor(self._context)
    tuner.Do(
        input_dict=self._input_dict,
        output_dict=self._output_dict,
        exec_properties=self._exec_properties)

    self._verify_output()

Example #3

0

Show file

    def setUp(self):
        super(CliAirflowEndToEndTest, self).setUp()

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Setup airflow_home in a temp directory
        self._airflow_home = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', tempfile.mkdtemp()),
            self._testMethodName)
        self._old_airflow_home = os.environ.get('AIRFLOW_HOME')
        os.environ['AIRFLOW_HOME'] = self._airflow_home
        self._old_home = os.environ.get('HOME')
        os.environ['HOME'] = self._airflow_home
        tf.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                        self._airflow_home)

        # Do not load examples to make this a bit faster.
        os.environ['AIRFLOW__CORE__LOAD_EXAMPLES'] = 'False'
        # Following environment variables make scheduler process dags faster.
        os.environ['AIRFLOW__SCHEDULER__JOB_HEARTBEAT_SEC'] = '1'
        os.environ['AIRFLOW__SCHEDULER__SCHEDULER_HEARTBEAT_SEC'] = '1'
        os.environ['AIRFLOW__SCHEDULER__RUN_DURATION'] = '-1'
        os.environ['AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL'] = '1'
        os.environ['AIRFLOW__SCHEDULER__PRINT_STATS_INTERVAL'] = '30'
        # Using more than one thread results in a warning for sqlite backend.
        # See https://github.com/tensorflow/tfx/issues/141
        os.environ['AIRFLOW__SCHEDULER__MAX_THREADS'] = '1'

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        # Copy data.
        chicago_taxi_pipeline_dir = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(
                        os.path.abspath(__file__))))), 'examples',
            'chicago_taxi_pipeline', '')
        data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
        content = tf.gfile.ListDirectory(data_dir)
        assert content, 'content in {} is empty'.format(data_dir)
        target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data',
                                       'simple')
        io_utils.copy_dir(data_dir, target_data_dir)
        assert tf.gfile.IsDirectory(target_data_dir)
        content = tf.gfile.ListDirectory(target_data_dir)
        assert content, 'content in {} is {}'.format(target_data_dir, content)
        io_utils.copy_file(
            os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
            os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

        # Initialize database.
        _ = subprocess.check_output(['airflow', 'initdb'])

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()

Example #4

0

Show file

File: executor.py Project: suryaavala/tfx

    def _PrepareModelPath(
            self, model: types.Artifact,
            serving_spec: infra_validator_pb2.ServingSpec) -> str:
        model_path = path_utils.serving_model_path(
            model.uri, path_utils.is_old_model_artifact(model))
        serving_binary = serving_spec.WhichOneof('serving_binary')
        if serving_binary == _TENSORFLOW_SERVING:
            # TensorFlow Serving requires model to be stored in its own directory
            # structure flavor. If current model_path does not conform to the flavor,
            # we need to make a copy to the temporary path.
            try:
                # Check whether current model_path conforms to the tensorflow serving
                # model path flavor. (Parsed without exception)
                tf_serving_flavor.parse_model_path(
                    model_path, expected_model_name=serving_spec.model_name)
            except ValueError:
                # Copy the model to comply with the tensorflow serving model path
                # flavor.
                temp_model_path = tf_serving_flavor.make_model_path(
                    model_base_path=self._get_tmp_dir(),
                    model_name=serving_spec.model_name,
                    version=int(time.time()))
                io_utils.copy_dir(src=model_path, dst=temp_model_path)
                self._AddCleanup(io_utils.delete_dir,
                                 self._context.get_tmp_path())
                return temp_model_path

        return model_path

Example #5

0

Show file

File: executor_test.py Project: sycdesign/tfx

    def testDoWithCustomSplits(self):
        # Update input dict.
        io_utils.copy_dir(
            os.path.join(self._testdata_dir, 'penguin/data/train'),
            os.path.join(self._output_data_dir, 'data/training'))
        io_utils.copy_dir(
            os.path.join(self._testdata_dir, 'penguin/data/eval'),
            os.path.join(self._output_data_dir, 'data/evaluating'))
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(self._output_data_dir, 'data')
        examples.split_names = artifact_utils.encode_split_names(
            ['training', 'evaluating'])
        self._input_dict[standard_component_specs.EXAMPLES_KEY] = [examples]

        # Update exec properties skeleton with custom splits.
        self._exec_properties[standard_component_specs.
                              TRAIN_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.TrainArgs(splits=['training'],
                                                        num_steps=1000))
        self._exec_properties[standard_component_specs.
                              EVAL_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.EvalArgs(splits=['evaluating'],
                                                       num_steps=500))
        self._exec_properties[
            standard_component_specs.MODULE_FILE_KEY] = os.path.join(
                self._testdata_dir, 'module_file', 'tuner_module.py')

        tuner = executor.Executor(self._context)
        tuner.Do(input_dict=self._input_dict,
                 output_dict=self._output_dict,
                 exec_properties=self._exec_properties)

        self._verify_output()

Example #6

0

Show file

    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Copies over recorded data to pipeline output uri.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
      output_dict: Output dict from output key to a list of Artifacts.
      exec_properties: A dict of execution properties.

    Returns:
      None

    Raises:
      FileNotFoundError: If the recorded test data dir doesn't exist any more.
    """
        for output_key, artifact_list in output_dict.items():
            for idx, artifact in enumerate(artifact_list):
                dest = artifact.uri
                src = os.path.join(self._test_data_dir, self._component_id,
                                   output_key, str(idx))
                if not os.path.exists(src):
                    raise FileNotFoundError("{} does not exist".format(src))
                io_utils.copy_dir(src, dest)
                logging.info("Finished copying from %s to %s", src, dest)

Example #7

0

Show file

    def testDoWithCustomSplits(self):
        # Update input dict.
        io_utils.copy_dir(
            os.path.join(self._source_data_dir,
                         'transform/transformed_examples/data/train'),
            os.path.join(self._output_data_dir, 'data/training'))
        io_utils.copy_dir(
            os.path.join(self._source_data_dir,
                         'transform/transformed_examples/data/eval'),
            os.path.join(self._output_data_dir, 'data/evaluating'))
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(self._output_data_dir, 'data')
        examples.split_names = artifact_utils.encode_split_names(
            ['training', 'evaluating'])
        self._input_dict[constants.EXAMPLES_KEY] = [examples]

        # Update exec properties skeleton with custom splits.
        self._exec_properties['train_args'] = json_format.MessageToJson(
            trainer_pb2.TrainArgs(splits=['training'], num_steps=1000),
            preserving_proto_field_name=True)
        self._exec_properties['eval_args'] = json_format.MessageToJson(
            trainer_pb2.EvalArgs(splits=['evaluating'], num_steps=500),
            preserving_proto_field_name=True)

        self._exec_properties['module_file'] = self._module_file
        self._do(self._trainer_executor)
        self._verify_model_exports()
        self._verify_model_run_exports()

Example #8

0

Show file

    def testDoWithCustomSplits(self):
        # Update input dict.
        io_utils.copy_dir(
            os.path.join(self._source_data_dir,
                         'transform/transformed_examples/data/train'),
            os.path.join(self._output_data_dir, 'data/training'))
        io_utils.copy_dir(
            os.path.join(self._source_data_dir,
                         'transform/transformed_examples/data/eval'),
            os.path.join(self._output_data_dir, 'data/evaluating'))
        examples = standard_artifacts.Examples()
        examples.uri = os.path.join(self._output_data_dir, 'data')
        examples.split_names = artifact_utils.encode_split_names(
            ['training', 'evaluating'])
        self._input_dict[standard_component_specs.EXAMPLES_KEY] = [examples]

        # Update exec properties skeleton with custom splits.
        self._exec_properties[standard_component_specs.
                              TRAIN_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.TrainArgs(splits=['training'],
                                                        num_steps=1000))
        self._exec_properties[standard_component_specs.
                              EVAL_ARGS_KEY] = proto_utils.proto_to_json(
                                  trainer_pb2.EvalArgs(splits=['evaluating'],
                                                       num_steps=500))

        self._exec_properties[
            standard_component_specs.MODULE_FILE_KEY] = self._module_file
        self._do(self._trainer_executor)
        self._verify_model_exports()
        self._verify_model_run_exports()

Example #9

0

Show file

File: executor.py Project: suryaavala/tfx

 def _CreateWarmupModel(self, blessing: types.Artifact, model_path: str,
                        warmup_requests: List[iv_types.Request]):
     output_model_path = path_utils.stamped_model_path(blessing.uri)
     io_utils.copy_dir(src=model_path, dst=output_model_path)
     io_utils.write_tfrecord_file(
         path_utils.warmup_file_path(output_model_path),
         *[_convert_to_prediction_log(r) for r in warmup_requests])
     blessing.set_int_custom_property(_MODEL_FLAG_KEY, 1)

Example #10

0

Show file

File: tflite_rewriter.py Project: suryaavala/tfx

def _create_tflite_compatible_saved_model(src: Text, dst: Text):
  io_utils.copy_dir(src, dst)
  assets_path = os.path.join(dst, tf.saved_model.ASSETS_DIRECTORY)
  if fileio.exists(assets_path):
    fileio.rmtree(assets_path)
  assets_extra_path = os.path.join(dst, EXTRA_ASSETS_DIRECTORY)
  if fileio.exists(assets_extra_path):
    fileio.rmtree(assets_extra_path)

Example #11

0

Show file

File: pipeline_recorder_utils.py Project: yifanmai/tfx

def record_pipeline(output_dir: Text,
                    metadata_db_uri: Optional[Text] = None,
                    host: Optional[Text] = None,
                    port: Optional[int] = None,
                    pipeline_name: Optional[Text] = None,
                    run_id: Optional[Text] = None) -> None:
    """Record pipeline run with run_id to output_dir.

  For the beam pipeline, metadata_db_uri is required. For KFP pipeline,
  host and port should be specified. If run_id is not specified, then
  pipeline_name ought to be specified in order to fetch the latest execution
  for the specified pipeline.

  Args:
    output_dir: Directory path where the pipeline outputs should be recorded.
    metadata_db_uri: Uri to metadata db.
    host: Hostname of the metadata grpc server
    port: Port number of the metadata grpc server.
    pipeline_name: Pipeline name, which is required if run_id isn't specified.
    run_id: Pipeline execution run_id.

  Raises:
    ValueError: In cases of invalid arguments:
      - metadata_db_uri is None or host and/or port is None.
      - run_id is None and pipeline_name is None.
    FileNotFoundError: if the source artifact uri does not already exist.
  """
    if host is not None and port is not None:
        metadata_config = metadata_store_pb2.MetadataStoreClientConfig(
            host=host, port=port)
    elif metadata_db_uri is not None:
        metadata_config = metadata.sqlite_metadata_connection_config(
            metadata_db_uri)
    else:
        raise ValueError('For KFP, host and port are required. '
                         'For beam pipeline, metadata_db_uri is required.')

    with metadata.Metadata(metadata_config) as metadata_connection:
        if run_id is None:
            if pipeline_name is None:
                raise ValueError('If the run_id is not specified,'
                                 ' pipeline_name should be specified')
            # fetch executions of the most recently updated execution context.
            executions = _get_latest_executions(metadata_connection,
                                                pipeline_name)
        else:
            execution_dict = _get_execution_dict(metadata_connection)
            if run_id in execution_dict:
                executions = execution_dict[run_id]
            else:
                raise ValueError(
                    'run_id {} is not recorded in the MLMD metadata'.format(
                        run_id))

        for src_uri, dest_uri in _get_paths(metadata_connection, executions,
                                            output_dir):
            io_utils.copy_dir(src_uri, dest_uri)
        logging.info('Pipeline Recorded at %s', output_dir)

Example #12

0

Show file

 def testCopyDir(self):
     old_path = os.path.join(self._base_dir, 'old', 'path')
     new_path = os.path.join(self._base_dir, 'new', 'path')
     io_utils.write_string_file(old_path, 'testing')
     io_utils.copy_dir(os.path.dirname(old_path), os.path.dirname(new_path))
     self.assertTrue(file_io.file_exists(new_path))
     f = file_io.FileIO(new_path, mode='r')
     self.assertEqual('testing', f.read())
     self.assertEqual(7, f.tell())

Example #13

0

Show file

File: io_utils_test.py Project: luvneries/tfx

 def testCopyDir(self):
   old_path = os.path.join(self._base_dir, 'old', 'path')
   new_path = os.path.join(self._base_dir, 'new', 'path')
   io_utils.write_string_file(old_path, 'testing')
   io_utils.copy_dir(os.path.dirname(old_path), os.path.dirname(new_path))
   self.assertTrue(file_io.file_exists(new_path))
   f = file_io.FileIO(new_path, mode='r')
   self.assertEqual('testing', f.read())
   self.assertEqual(7, f.tell())

Example #14

0

Show file

File: cli_airflow_e2e_test.py Project: groehm/tfx

  def setUp(self):
    super(CliAirflowEndToEndTest, self).setUp()

    # List of packages installed.
    self._pip_list = str(subprocess.check_output(['pip', 'freeze', '--local']))

    # Check if Apache Airflow is installed before running E2E tests.
    if labels.AIRFLOW_PACKAGE_NAME not in self._pip_list:
      sys.exit('Apache Airflow not installed.')

    # Change the encoding for Click since Python 3 is configured to use ASCII as
    # encoding for the environment.
    if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
      os.environ['LANG'] = 'en_US.utf-8'

    # Setup airflow_home in a temp directory
    self._airflow_home = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', tempfile.mkdtemp()),
        self._testMethodName, 'airflow')
    self._old_airflow_home = os.environ.get('AIRFLOW_HOME')
    os.environ['AIRFLOW_HOME'] = self._airflow_home
    self._old_home = os.environ.get('HOME')
    os.environ['HOME'] = self._airflow_home
    absl.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                      self._airflow_home)

    # Testdata path.
    self._testdata_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')

    # Do not load examples to make this a bit faster.
    os.environ['AIRFLOW__CORE__LOAD_EXAMPLES'] = 'False'

    # Copy data.
    chicago_taxi_pipeline_dir = os.path.join(
        os.path.dirname(
            os.path.dirname(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
        'examples', 'chicago_taxi_pipeline', '')
    data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
    content = tf.io.gfile.listdir(data_dir)
    assert content, 'content in {} is empty'.format(data_dir)
    target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data', 'simple')
    io_utils.copy_dir(data_dir, target_data_dir)
    assert tf.io.gfile.isdir(target_data_dir)
    content = tf.io.gfile.listdir(target_data_dir)
    assert content, 'content in {} is {}'.format(target_data_dir, content)
    io_utils.copy_file(
        os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
        os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

    self._airflow_initdb()

    # Initialize CLI runner.
    self.runner = click_testing.CliRunner()

Example #15

0

Show file

    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Push model to target directory if blessed.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: A dict of execution properties, including:
        - push_destination: JSON string of pusher_pb2.PushDestination instance,
          providing instruction of destination to push model.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        model_push = artifact_utils.get_single_instance(
            output_dict[PUSHED_MODEL_KEY])

        model_push_uri = model_push.uri
        model_export = artifact_utils.get_single_instance(
            input_dict[MODEL_KEY])
        model_export_uri = model_export.uri
        logging.info('Model pushing.')
        # Copy the model to pushing uri.
        model_path = path_utils.serving_model_path(model_export_uri)
        model_version = str(int(time.time()))
        # model_version = path_utils.get_serving_model_version(model_export_uri)
        logging.info('Model version is %s', model_version)
        io_utils.copy_dir(model_path,
                          os.path.join(model_push_uri, model_version))
        logging.info('Model written to %s.', model_push_uri)

        push_destination = pusher_pb2.PushDestination()
        json_format.Parse(exec_properties['push_destination'],
                          push_destination)
        serving_path = os.path.join(push_destination.filesystem.base_directory,
                                    model_version)
        if tf.io.gfile.exists(serving_path):
            logging.info(
                'Destination directory %s already exists, skipping current push.',
                serving_path)
        else:
            # tf.serving won't load partial model, it will retry until fully copied.
            io_utils.copy_dir(model_path, serving_path)
            logging.info('Model written to serving path %s.', serving_path)

        model_push.set_int_custom_property('pushed', 1)
        model_push.set_string_custom_property('pushed_model', model_export_uri)
        model_push.set_int_custom_property('pushed_model_id', model_export.id)
        logging.info('Model pushed to %s.', serving_path)

Example #16

0

Show file

File: cli_airflow_e2e_test.py Project: jay90099/tfx

  def setUp(self):
    super().setUp()

    # List of packages installed.
    self._pip_list = pip_utils.get_package_names()

    # Check if Apache Airflow is installed before running E2E tests.
    if labels.AIRFLOW_PACKAGE_NAME not in self._pip_list:
      sys.exit('Apache Airflow not installed.')

    # Change the encoding for Click since Python 3 is configured to use ASCII as
    # encoding for the environment.
    if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
      os.environ['LANG'] = 'en_US.utf-8'

    # Setup airflow_home in a temp directory
    self._airflow_home = os.path.join(self.tmp_dir, 'airflow')
    self.enter_context(
        test_case_utils.override_env_var('AIRFLOW_HOME', self._airflow_home))
    self.enter_context(
        test_case_utils.override_env_var('HOME', self._airflow_home))

    absl.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                      self._airflow_home)

    # Testdata path.
    self._testdata_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')

    self._pipeline_name = 'chicago_taxi_simple'
    self._pipeline_path = os.path.join(self._testdata_dir,
                                       'test_pipeline_airflow_1.py')

    # Copy data.
    chicago_taxi_pipeline_dir = os.path.join(
        os.path.dirname(
            os.path.dirname(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
        'examples', 'chicago_taxi_pipeline')
    data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
    content = fileio.listdir(data_dir)
    assert content, 'content in {} is empty'.format(data_dir)
    target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data', 'simple')
    io_utils.copy_dir(data_dir, target_data_dir)
    assert fileio.isdir(target_data_dir)
    content = fileio.listdir(target_data_dir)
    assert content, 'content in {} is {}'.format(target_data_dir, content)
    io_utils.copy_file(
        os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
        os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

    # Initialize CLI runner.
    self.runner = click_testing.CliRunner()

Example #17

0

Show file

File: cli_airflow_e2e_test.py Project: yongsheng268/tfx

    def setUp(self):
        super(CliAirflowEndToEndTest, self).setUp()

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Setup airflow_home in a temp directory
        self._airflow_home = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', tempfile.mkdtemp()),
            self._testMethodName)
        self._old_airflow_home = os.environ.get('AIRFLOW_HOME')
        os.environ['AIRFLOW_HOME'] = self._airflow_home
        self._old_home = os.environ.get('HOME')
        os.environ['HOME'] = self._airflow_home
        tf.logging.info('Using %s as AIRFLOW_HOME and HOME in this e2e test',
                        self._airflow_home)

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        # Copy data.
        chicago_taxi_pipeline_dir = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(
                        os.path.abspath(__file__))))), 'examples',
            'chicago_taxi_pipeline', '')
        data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
        content = tf.gfile.ListDirectory(data_dir)
        assert content, 'content in {} is empty'.format(data_dir)
        target_data_dir = os.path.join(self._airflow_home, 'taxi', 'data',
                                       'simple')
        io_utils.copy_dir(data_dir, target_data_dir)
        assert tf.gfile.IsDirectory(target_data_dir)
        content = tf.gfile.ListDirectory(target_data_dir)
        assert content, 'content in {} is {}'.format(target_data_dir, content)
        io_utils.copy_file(
            os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
            os.path.join(self._airflow_home, 'taxi', 'taxi_utils.py'))

        # Initialize database.
        _ = subprocess.check_output(['airflow', 'initdb'])

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()

        # Start scheduler.
        self._scheduler = subprocess.Popen(['airflow', 'scheduler'])

Example #18

0

Show file

File: executor_test.py Project: hieuhc/tfx

    def setUpClass(cls):
        super(ExecutorTest, cls).setUpClass()
        source_example_dir = os.path.join(cls._SOURCE_DATA_DIR,
                                          'csv_example_gen')

        io_utils.copy_dir(source_example_dir, cls._ARTIFACT1_URI)
        io_utils.copy_dir(source_example_dir, cls._ARTIFACT2_URI)

        # Duplicate the number of train and eval records such that
        # second artifact has twice as many as first.
        artifact2_pattern = os.path.join(cls._ARTIFACT2_URI, '*', '*')
        artifact2_files = tf.io.gfile.glob(artifact2_pattern)
        for filepath in artifact2_files:
            directory, filename = os.path.split(filepath)
            io_utils.copy_file(filepath,
                               os.path.join(directory, 'dup_' + filename))

Example #19

0

Show file

File: io_utils_test.py Project: konny0311/tfx

    def testCopyDirWithTrailingSlashes(self):
        old_path1 = os.path.join(self._base_dir, 'old1', '')
        old_path_file1 = os.path.join(old_path1, 'child', 'file')
        new_path1 = os.path.join(self._base_dir, 'new1')
        new_path_file1 = os.path.join(new_path1, 'child', 'file')

        io_utils.write_string_file(old_path_file1, 'testing')
        io_utils.copy_dir(old_path1, new_path1)
        self.assertTrue(file_io.file_exists(new_path_file1))

        old_path2 = os.path.join(self._base_dir, 'old2')
        old_path_file2 = os.path.join(old_path2, 'child', 'file')
        new_path2 = os.path.join(self._base_dir, 'new2', '')
        new_path_file2 = os.path.join(new_path2, 'child', 'file')

        io_utils.write_string_file(old_path_file2, 'testing')
        io_utils.copy_dir(old_path2, new_path2)
        self.assertTrue(file_io.file_exists(new_path_file2))

Example #20

0

Show file

File: io_utils_test.py Project: konny0311/tfx

    def testCopyDir(self):
        old_path = os.path.join(self._base_dir, 'old')
        old_path_file1 = os.path.join(old_path, 'file1')
        old_path_file2 = os.path.join(old_path, 'dir', 'dir2', 'file2')
        new_path = os.path.join(self._base_dir, 'new')
        new_path_file1 = os.path.join(new_path, 'file1')
        new_path_file2 = os.path.join(new_path, 'dir', 'dir2', 'file2')

        io_utils.write_string_file(old_path_file1, 'testing')
        io_utils.write_string_file(old_path_file2, 'testing2')
        io_utils.copy_dir(old_path, new_path)

        self.assertTrue(file_io.file_exists(new_path_file1))
        f = file_io.FileIO(new_path_file1, mode='r')
        self.assertEqual('testing', f.readline())

        self.assertTrue(file_io.file_exists(new_path_file2))
        f = file_io.FileIO(new_path_file2, mode='r')
        self.assertEqual('testing2', f.readline())

Example #21

0

Show file

File: cli_beam_e2e_test.py Project: zw39125432/tfx

    def setUp(self):
        super(CliBeamEndToEndTest, self).setUp()

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Setup beam_home in a temp directory
        self._home = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', tempfile.mkdtemp()),
            self._testMethodName)
        self._old_home = os.environ.get('HOME')
        os.environ['HOME'] = self._home
        self._old_beam_home = os.environ.get('BEAM_HOME')
        os.environ['BEAM_HOME'] = os.path.join(self._home, 'beam', '')
        self._beam_home = os.environ['BEAM_HOME']

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        # Copy data.
        chicago_taxi_pipeline_dir = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(
                        os.path.abspath(__file__))))), 'examples',
            'chicago_taxi_pipeline', '')
        data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
        content = tf.io.gfile.listdir(data_dir)
        assert content, 'content in {} is empty'.format(data_dir)
        target_data_dir = os.path.join(self._home, 'taxi', 'data', 'simple')
        io_utils.copy_dir(data_dir, target_data_dir)
        assert tf.io.gfile.isdir(target_data_dir)
        content = tf.io.gfile.listdir(target_data_dir)
        assert content, 'content in {} is {}'.format(target_data_dir, content)
        io_utils.copy_file(
            os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
            os.path.join(self._home, 'taxi', 'taxi_utils.py'))

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()

Example #22

0

Show file

def run_fn(fn_args: executor.TrainerFnArgs):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
    schema = io_utils.parse_pbtxt_file(fn_args.schema_file,
                                       schema_pb2.Schema())

    training_spec = _trainer_fn(fn_args, schema)

    # Train the model
    absl.logging.info('Training model.')
    tf.estimator.train_and_evaluate(training_spec['estimator'],
                                    training_spec['train_spec'],
                                    training_spec['eval_spec'])
    absl.logging.info('Training complete.  Model written to %s',
                      fn_args.serving_model_dir)

    # Export an eval savedmodel for TFMA
    # NOTE: When trained in distributed training cluster, eval_savedmodel must be
    # exported only by the chief worker (check TF_CONFIG).
    absl.logging.info('Exporting eval_savedmodel for TFMA.')
    eval_export_dir = path_utils.eval_model_dir(fn_args.model_run_dir)
    tfma.export.export_eval_savedmodel(
        estimator=training_spec['estimator'],
        export_dir_base=eval_export_dir,
        eval_input_receiver_fn=training_spec['eval_input_receiver_fn'])

    absl.logging.info('Exported eval_savedmodel to %s.',
                      fn_args.eval_model_dir)

    # TODO(b/160795287): Deprecate estimator based executor.
    # Copy serving and eval model from model_run to model artifact directory.
    serving_source = path_utils.serving_model_path(fn_args.model_run_dir)
    io_utils.copy_dir(serving_source, fn_args.serving_model_dir)
    absl.logging.info('Serving model copied to: %s.',
                      fn_args.serving_model_dir)

    eval_source = path_utils.eval_model_path(fn_args.model_run_dir)
    io_utils.copy_dir(eval_source, fn_args.eval_model_dir)
    absl.logging.info('Eval model copied to: %s.', fn_args.eval_model_dir)

Example #23

0

Show file

File: io_utils_test.py Project: jay90099/tfx

 def testCopyDir(self):
   self.createFiles({
       'old': {
           'file1.txt': 'testing',
           'dir1': {
               'dir2': {
                   'file2.txt': 'testing2'
               }
           }
       }
   })
   io_utils.copy_dir(self.relpath('old'), self.relpath('new'))
   self.assertDirectoryEqual(self.relpath('new'), {
       'file1.txt': 'testing',
       'dir1': {
           'dir2': {
               'file2.txt': 'testing2'
           }
       }
   })

Example #24

0

Show file

    def setUp(self):
        super().setUp()

        # Change the encoding for Click since Python 3 is configured to use ASCII as
        # encoding for the environment.
        if codecs.lookup(locale.getpreferredencoding()).name == 'ascii':
            os.environ['LANG'] = 'en_US.utf-8'

        # Setup beam_home in a temp directory
        self._home = self.tmp_dir
        self._beam_home = os.path.join(self._home, 'beam')
        self.enter_context(
            test_case_utils.override_env_var('BEAM_HOME', self._beam_home))
        self.enter_context(test_case_utils.override_env_var(
            'HOME', self._home))

        # Testdata path.
        self._testdata_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        # Copy data.
        chicago_taxi_pipeline_dir = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(
                        os.path.abspath(__file__))))), 'examples',
            'chicago_taxi_pipeline', '')
        data_dir = os.path.join(chicago_taxi_pipeline_dir, 'data', 'simple')
        content = fileio.listdir(data_dir)
        assert content, 'content in {} is empty'.format(data_dir)
        target_data_dir = os.path.join(self._home, 'taxi', 'data', 'simple')
        io_utils.copy_dir(data_dir, target_data_dir)
        assert fileio.isdir(target_data_dir)
        content = fileio.listdir(target_data_dir)
        assert content, 'content in {} is {}'.format(target_data_dir, content)
        io_utils.copy_file(
            os.path.join(chicago_taxi_pipeline_dir, 'taxi_utils.py'),
            os.path.join(self._home, 'taxi', 'taxi_utils.py'))

        # Initialize CLI runner.
        self.runner = click_testing.CliRunner()

Example #25

0

Show file

File: penguin_pipeline_local_e2e_test.py Project: numerology/tfx

    def setUp(self):
        super(PenguinPipelineLocalEndToEndTest, self).setUp()

        self._test_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        self._pipeline_name = 'penguin_test'
        self._data_root = os.path.join(os.path.dirname(__file__), 'data')

        # Create a data root for rolling window test
        # - data
        #   - day1
        #     - penguins_processed.csv
        #   - day2
        #     - penguins_processed.csv
        #   - day3
        #     - penguins_processed.csv
        self._data_root_span = os.path.join(self._test_dir, 'data')
        io_utils.copy_dir(self._data_root,
                          os.path.join(self._data_root_span, 'day1'))
        io_utils.copy_dir(self._data_root,
                          os.path.join(self._data_root_span, 'day2'))
        io_utils.copy_dir(self._data_root,
                          os.path.join(self._data_root_span, 'day3'))

        self._module_file = os.path.join(os.path.dirname(__file__),
                                         'penguin_utils.py')
        self._serving_model_dir = os.path.join(self._test_dir, 'serving_model')
        self._pipeline_root = os.path.join(self._test_dir, 'tfx', 'pipelines',
                                           self._pipeline_name)
        self._metadata_path = os.path.join(self._test_dir, 'tfx', 'metadata',
                                           self._pipeline_name, 'metadata.db')

Example #26

0

Show file

File: test_utils.py Project: jay90099/tfx

    def setUp(self):
        super().setUp()
        self._test_dir = self.tmp_dir
        self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))

        self._test_output_dir = 'gs://{}/test_output'.format(self._BUCKET_NAME)

        test_id = test_utils.random_id()

        self._testdata_root = 'gs://{}/test_data/{}'.format(
            self._BUCKET_NAME, test_id)
        io_utils.copy_dir(self._TEST_DATA_ROOT, self._testdata_root)

        self._data_root = os.path.join(self._testdata_root, 'external', 'csv')

        self._transform_module = os.path.join(self._MODULE_ROOT,
                                              'transform_module.py')
        self._trainer_module = os.path.join(self._MODULE_ROOT,
                                            'trainer_module.py')
        self._serving_model_dir = os.path.join(self._testdata_root, 'output')

        self.addCleanup(self._delete_test_dir, test_id)

Example #27

0

Show file

File: penguin_pipeline_kubeflow_e2e_test.py Project: jay90099/tfx

  def setUp(self):
    super().setUp()

    penguin_examples_dir = os.path.join(self._REPO_BASE, 'tfx', 'examples',
                                        'penguin')
    # The location of the penguin test data and schema. The input files are
    # copied to a test-local location for each invocation, and cleaned up at the
    # end of test.
    penguin_test_data_root = os.path.join(penguin_examples_dir, 'data')
    penguin_test_schema_file = os.path.join(penguin_examples_dir, 'schema',
                                            'user_provided', 'schema.pbtxt')

    # The location of the user module for penguin. Will be packaged and copied
    # to under the pipeline root before pipeline execution.
    self._penguin_dependency_file = os.path.join(
        penguin_examples_dir, 'penguin_utils_cloud_tuner.py')

    self._penguin_data_root = os.path.join(self._testdata_root, 'data')
    io_utils.copy_dir(penguin_test_data_root, self._penguin_data_root)
    self._penguin_schema_file = os.path.join(self._testdata_root,
                                             'schema.pbtxt')
    io_utils.copy_file(penguin_test_schema_file, self._penguin_schema_file)

Example #28

0

Show file

File: io_utils_test.py Project: jay90099/tfx

  def testCopyDirWithTrailingSlashes(self):
    self.createFiles({
        'old': {
            'dir': {
                'file.txt': 'testing'
            }
        }
    })

    with self.subTest('Copy old/ to new1'):
      io_utils.copy_dir(self.relpath('old', ''), self.relpath('new1'))
      self.assertDirectoryEqual(self.relpath('new1'), {
          'dir': {
              'file.txt': 'testing'
          }
      })

    with self.subTest('Copy old to new2/'):
      io_utils.copy_dir(self.relpath('old'), self.relpath('new2', ''))
      self.assertDirectoryEqual(self.relpath('new2'), {
          'dir': {
              'file.txt': 'testing'
          }
      })

Example #29

0

Show file

    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Push model to target directory if blessed.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model: exported model from trainer.
        - model_blessing: model blessing path from model_validator.  A push
          action delivers the model exports produced by Trainer to the
          destination defined in component config.
      output_dict: Output dict from key to a list of artifacts, including:
        - pushed_model: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: A dict of execution properties, including:
        - push_destination: JSON string of pusher_pb2.PushDestination instance,
          providing instruction of destination to push model.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        model_push = artifact_utils.get_single_instance(
            output_dict[standard_component_specs.PUSHED_MODEL_KEY])
        if not self.CheckBlessing(input_dict):
            self._MarkNotPushed(model_push)
            return
        model_export = artifact_utils.get_single_instance(
            input_dict[standard_component_specs.MODEL_KEY])
        model_path = path_utils.serving_model_path(
            model_export.uri, path_utils.is_old_model_artifact(model_export))

        # Push model to the destination, which can be listened by a model server.
        #
        # If model is already successfully copied to outside before, stop copying.
        # This is because model validator might blessed same model twice (check
        # mv driver) with different blessing output, we still want Pusher to
        # handle the mv output again to keep metadata tracking, but no need to
        # copy to outside path again..
        # TODO(jyzhao): support rpc push and verification.
        push_destination = pusher_pb2.PushDestination()
        proto_utils.json_to_proto(
            exec_properties[standard_component_specs.PUSH_DESTINATION_KEY],
            push_destination)

        destination_kind = push_destination.WhichOneof('destination')
        if destination_kind == 'filesystem':
            fs_config = push_destination.filesystem
            if fs_config.versioning == _Versioning.AUTO:
                fs_config.versioning = _Versioning.UNIX_TIMESTAMP
            if fs_config.versioning == _Versioning.UNIX_TIMESTAMP:
                model_version = str(int(time.time()))
            else:
                raise NotImplementedError('Invalid Versioning {}'.format(
                    fs_config.versioning))
            logging.info('Model version: %s', model_version)
            serving_path = os.path.join(fs_config.base_directory,
                                        model_version)

            if fileio.exists(serving_path):
                logging.info(
                    'Destination directory %s already exists, skipping current push.',
                    serving_path)
            else:
                # tf.serving won't load partial model, it will retry until fully copied.
                io_utils.copy_dir(model_path, serving_path)
                logging.info('Model written to serving path %s.', serving_path)
        else:
            raise NotImplementedError(
                'Invalid push destination {}'.format(destination_kind))

        # Copy the model to pushing uri for archiving.
        io_utils.copy_dir(model_path, model_push.uri)
        self._MarkPushed(model_push,
                         pushed_destination=serving_path,
                         pushed_version=model_version)
        logging.info('Model pushed to %s.', model_push.uri)

Example #30

0

Show file

File: executor.py Project: vsatyav007/tfx

  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]):
    """Overrides the tfx_pusher_executor.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: Mostly a passthrough input dict for
        tfx.components.Pusher.executor.  custom_config.bigquery_serving_args is
        consumed by this class.  For the full set of parameters supported by
        Big Query ML, refer to https://cloud.google.com/bigquery-ml/

    Returns:
      None
    Raises:
      ValueError:
        If bigquery_serving_args is not in exec_properties.custom_config.
        If pipeline_root is not 'gs://...'
      RuntimeError: if the Big Query job failed.
    """
    self._log_startup(input_dict, output_dict, exec_properties)
    model_push = artifact_utils.get_single_instance(
        output_dict[tfx_pusher_executor.PUSHED_MODEL_KEY])
    if not self.CheckBlessing(input_dict):
      self._MarkNotPushed(model_push)
      return

    model_export = artifact_utils.get_single_instance(
        input_dict[tfx_pusher_executor.MODEL_KEY])
    model_export_uri = model_export.uri

    custom_config = json_utils.loads(
        exec_properties.get(_CUSTOM_CONFIG_KEY, 'null'))
    if custom_config is not None and not isinstance(custom_config, Dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict.')

    bigquery_serving_args = custom_config.get(SERVING_ARGS_KEY)
    # if configuration is missing error out
    if bigquery_serving_args is None:
      raise ValueError('Big Query ML configuration was not provided')

    bq_model_uri = '.'.join([
        bigquery_serving_args[_PROJECT_ID_KEY],
        bigquery_serving_args[_BQ_DATASET_ID_KEY],
        bigquery_serving_args[_MODEL_NAME_KEY],
    ])

    # Deploy the model.
    io_utils.copy_dir(
        src=path_utils.serving_model_path(model_export_uri), dst=model_push.uri)
    model_path = model_push.uri
    if not model_path.startswith(_GCS_PREFIX):
      raise ValueError('pipeline_root must be gs:// for BigQuery ML Pusher.')

    logging.info('Deploying the model to BigQuery ML for serving: %s from %s',
                 bigquery_serving_args, model_path)

    query = _BQML_CREATE_OR_REPLACE_MODEL_QUERY_TEMPLATE.format(
        model_uri=bq_model_uri, model_path=model_path)

    # TODO(zhitaoli): Refactor the executor_class_path creation into a common
    # utility function.
    executor_class_path = '%s.%s' % (self.__class__.__module__,
                                     self.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      default_query_job_config = bigquery.job.QueryJobConfig(
          labels=telemetry_utils.get_labels_dict())
    client = bigquery.Client(default_query_job_config=default_query_job_config)

    try:
      query_job = client.query(query)
      query_job.result()  # Waits for the query to finish
    except Exception as e:
      raise RuntimeError('BigQuery ML Push failed: {}'.format(e))

    logging.info('Successfully deployed model %s serving from %s', bq_model_uri,
                 model_path)

    # Setting the push_destination to bigquery uri
    self._MarkPushed(model_push, pushed_destination=bq_model_uri)

Example #31

0

Show file

File: executor.py Project: ashishML/tfx

    def Do(self, input_dict, output_dict, exec_properties):
        """Push model to target if blessed.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: A dict of execution properties, including:
        - push_destination: JSON string of pusher_pb2.PushDestination instance,
          providing instruction of destination to push model.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)
        model_export = types.get_single_instance(input_dict['model_export'])
        model_export_uri = model_export.uri
        model_blessing_uri = types.get_single_uri(input_dict['model_blessing'])
        model_push = types.get_single_instance(output_dict['model_push'])
        model_push_uri = model_push.uri
        # TODO(jyzhao): should this be in driver or executor.
        if not tf.gfile.Exists(os.path.join(model_blessing_uri, 'BLESSED')):
            model_push.set_int_custom_property('pushed', 0)
            tf.logging.info('Model on %s was not blessed', )
            return
        tf.logging.info('Model pushing.')
        # Copy the model we are pushing into
        model_path = path_utils.serving_model_path(model_export_uri)
        # Note: we do not have a logical model version right now. This
        # model_version is a timestamp mapped to trainer's exporter.
        model_version = os.path.basename(model_path)
        tf.logging.info('Model version is %s', model_version)
        io_utils.copy_dir(model_path,
                          os.path.join(model_push_uri, model_version))
        tf.logging.info('Model written to %s.', model_push_uri)

        # Copied to a fixed outside path, which can be listened by model server.
        #
        # If model is already successfully copied to outside before, stop copying.
        # This is because model validator might blessed same model twice (check
        # mv driver) with different blessing output, we still want Pusher to
        # handle the mv output again to keep metadata tracking, but no need to
        # copy to outside path again..
        # TODO(jyzhao): support rpc push and verification.
        push_destination = pusher_pb2.PushDestination()
        json_format.Parse(exec_properties['push_destination'],
                          push_destination)
        serving_path = os.path.join(push_destination.filesystem.base_directory,
                                    model_version)
        if tf.gfile.Exists(serving_path):
            tf.logging.info(
                'Destination directory %s already exists, skipping current push.',
                serving_path)
        else:
            # tf.serving won't load partial model, it will retry until fully copied.
            io_utils.copy_dir(model_path, serving_path)
            tf.logging.info('Model written to serving path %s.', serving_path)

        model_push.set_int_custom_property('pushed', 1)
        model_push.set_string_custom_property('pushed_model', model_export_uri)
        model_push.set_int_custom_property('pushed_model_id', model_export.id)
        tf.logging.info('Model pushed to %s.', serving_path)

        if exec_properties.get('custom_config'):
            cmle_serving_args = exec_properties.get(
                'custom_config', {}).get('cmle_serving_args')
            if cmle_serving_args is not None:
                return cmle_runner.deploy_model_for_serving(
                    serving_path, model_version, cmle_serving_args,
                    exec_properties['log_root'])

Example #32

0

Show file

File: executor.py Project: luvneries/tfx

  def Do(self, input_dict,
         output_dict,
         exec_properties):
    """Push model to target if blessed.

    Args:
      input_dict: Input dict from input key to a list of artifacts, including:
        - model_export: exported model from trainer.
        - model_blessing: model blessing path from model_validator.
      output_dict: Output dict from key to a list of artifacts, including:
        - model_push: A list of 'ModelPushPath' artifact of size one. It will
          include the model in this push execution if the model was pushed.
      exec_properties: A dict of execution properties, including:
        - push_destination: JSON string of pusher_pb2.PushDestination instance,
          providing instruction of destination to push model.

    Returns:
      None
    """
    self._log_startup(input_dict, output_dict, exec_properties)
    model_export = types.get_single_instance(input_dict['model_export'])
    model_export_uri = model_export.uri
    model_blessing_uri = types.get_single_uri(input_dict['model_blessing'])
    model_push = types.get_single_instance(output_dict['model_push'])
    model_push_uri = model_push.uri
    # TODO(jyzhao): should this be in driver or executor.
    if not tf.gfile.Exists(os.path.join(model_blessing_uri, 'BLESSED')):
      model_push.set_int_custom_property('pushed', 0)
      tf.logging.info('Model on %s was not blessed',)
      return
    tf.logging.info('Model pushing.')
    # Copy the model we are pushing into
    model_path = path_utils.serving_model_path(model_export_uri)
    # Note: we do not have a logical model version right now. This
    # model_version is a timestamp mapped to trainer's exporter.
    model_version = os.path.basename(model_path)
    tf.logging.info('Model version is %s', model_version)
    io_utils.copy_dir(model_path, os.path.join(model_push_uri, model_version))
    tf.logging.info('Model written to %s.', model_push_uri)

    # Copied to a fixed outside path, which can be listened by model server.
    #
    # If model is already successfully copied to outside before, stop copying.
    # This is because model validator might blessed same model twice (check
    # mv driver) with different blessing output, we still want Pusher to
    # handle the mv output again to keep metadata tracking, but no need to
    # copy to outside path again..
    # TODO(jyzhao): support rpc push and verification.
    push_destination = pusher_pb2.PushDestination()
    json_format.Parse(exec_properties['push_destination'], push_destination)
    serving_path = os.path.join(push_destination.filesystem.base_directory,
                                model_version)
    if tf.gfile.Exists(serving_path):
      tf.logging.info(
          'Destination directory %s already exists, skipping current push.',
          serving_path)
    else:
      # tf.serving won't load partial model, it will retry until fully copied.
      io_utils.copy_dir(model_path, serving_path)
      tf.logging.info('Model written to serving path %s.', serving_path)

    model_push.set_int_custom_property('pushed', 1)
    model_push.set_string_custom_property('pushed_model', model_export_uri)
    model_push.set_int_custom_property('pushed_model_id', model_export.id)
    tf.logging.info('Model pushed to %s.', serving_path)

    if exec_properties.get('custom_config'):
      cmle_serving_args = exec_properties.get('custom_config',
                                              {}).get('cmle_serving_args')
      if cmle_serving_args is not None:
        return cmle_runner.deploy_model_for_serving(serving_path, model_version,
                                                    cmle_serving_args,
                                                    exec_properties['log_root'])