def clear_output_dirs(output_dict: Dict[str, List[types.Artifact]]) -> None: """Clear dirs of output artifacts' URI.""" for _, artifact_list in output_dict.items(): for artifact in artifact_list: if fileio.isdir(artifact.uri) and fileio.listdir(artifact.uri): fileio.rmtree(artifact.uri) fileio.mkdir(artifact.uri)
def copy_dir( src: str, dst: str, allow_regex_patterns: Iterable[str] = (), deny_regex_patterns: Iterable[str] = (), ) -> None: """Copies the whole directory recursively from source to destination. Args: src: Source directory to copy from. <src>/a/b.txt will be copied to <dst>/a/b.txt. dst: Destination directoy to copy to. <src>/a/b.txt will be copied to <dst>/a/b.txt. allow_regex_patterns: Optional list of allowlist regular expressions to filter from. Pattern is matched against the full path of the file. Files and subdirectories that do not match any of the patterns will not be copied. deny_regex_patterns: Optional list of denylist regular expressions to filter from. Pattern is matched against the full path of the file. Files and subdirectories that match any of the patterns will not be copied. """ src = src.rstrip('/') dst = dst.rstrip('/') allow_regex_patterns = [re.compile(p) for p in allow_regex_patterns] deny_regex_patterns = [re.compile(p) for p in deny_regex_patterns] def should_copy(path): if allow_regex_patterns: if not any(p.search(path) for p in allow_regex_patterns): return False if deny_regex_patterns: if any(p.search(path) for p in deny_regex_patterns): return False return True if fileio.exists(dst): fileio.rmtree(dst) fileio.makedirs(dst) for dir_name, sub_dirs, leaf_files in fileio.walk(src): new_dir_name = dir_name.replace(src, dst, 1) new_dir_exists = fileio.isdir(new_dir_name) for leaf_file in leaf_files: leaf_file_path = os.path.join(dir_name, leaf_file) if should_copy(leaf_file_path): if not new_dir_exists: # Parent directory may not have been created yet if its name is not # in the allowlist, but its containing file is. fileio.makedirs(new_dir_name) new_dir_exists = True new_file_path = os.path.join(new_dir_name, leaf_file) fileio.copy(leaf_file_path, new_file_path) for sub_dir in sub_dirs: if should_copy(os.path.join(dir_name, sub_dir)): fileio.makedirs(os.path.join(new_dir_name, sub_dir))
def _create_tflite_compatible_saved_model(src: Text, dst: Text): io_utils.copy_dir(src, dst) assets_path = os.path.join(dst, tf.saved_model.ASSETS_DIRECTORY) if fileio.exists(assets_path): fileio.rmtree(assets_path) assets_extra_path = os.path.join(dst, EXTRA_ASSETS_DIRECTORY) if fileio.exists(assets_extra_path): fileio.rmtree(assets_extra_path)
def remove_output_dirs(output_dict: Dict[Text, List[types.Artifact]]) -> None: """Remove dirs of output artifacts' URI.""" for _, artifact_list in output_dict.items(): for artifact in artifact_list: if fileio.isdir(artifact.uri): fileio.rmtree(artifact.uri) else: fileio.remove(artifact.uri)
def run_fn(fn_args: tfx.components.FnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, tf_transform_output, batch_size=_TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, tf_transform_output, batch_size=_EVAL_BATCH_SIZE) mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = _build_keras_model() model.fit(train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, verbose=2) signatures = { 'serving_default': _get_inference_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec(shape=[None], dtype=tf.int64, name=_CUR_PAGE_FEATURE_KEY), tf.TensorSpec(shape=[None], dtype=tf.int64, name=_SESSION_INDEX_FEATURE_KEY)), } # Create the saved_model in a temporary directory. temp_saving_model_dir = os.path.join(fn_args.serving_model_dir, 'temp') model.save(temp_saving_model_dir, save_format='tf', signatures=signatures) # Convert the saved_model to a tfjs model and store it in the final directory. tfrw = rewriter_factory.create_rewriter(rewriter_factory.TFJS_REWRITER, name='tfjs_rewriter') converters.rewrite_saved_model(temp_saving_model_dir, fn_args.serving_model_dir, tfrw, rewriter.ModelType.TFJS_MODEL) # Copy the vocabulary computed by transform to the final directory. # The vocabulary is not included in the original savedmodel because vocab # lookups are currently not supported in TFJS and are expected to be done # independently by client code. fileio.copy(tf_transform_output.vocabulary_file_by_name(_VOCAB_FILENAME), os.path.join(fn_args.serving_model_dir, _VOCAB_FILENAME)) fileio.rmtree(temp_saving_model_dir)
def _clean_up_stateless_execution_info( self, execution_info: data_types.ExecutionInfo): logging.info('Cleaning up stateless execution info.') # Clean up tmp dir try: fileio.rmtree(execution_info.tmp_dir) except fileio.NotFoundError: # TODO(b/182964682): investigate the root cause of why this is happening. logging.warning( 'execution_info.tmp_dir %s is not found, it is likely that it has ' 'been deleted by the executor. This is the full execution_info: %s', execution_info.tmp_dir, execution_info.to_proto())
def _clean_up_stateful_execution_info( self, execution_info: data_types.ExecutionInfo): """Post execution clean up.""" # Clean up stateful working dir # Note that: # stateful_working_dir = (os.path.join( # self._node_dir, # self._pipeline_run_id, <-- we want to clean from this level down. # _STATEFUL_WORKING_DIR) stateful_working_dir = os.path.abspath( os.path.join(execution_info.stateful_working_dir, os.pardir)) fileio.rmtree(stateful_working_dir)
def _remove_task_dirs(task: task_lib.ExecNodeTask) -> None: """Removes directories created for the task.""" if task.stateful_working_dir: outputs_utils.remove_stateful_working_dir(task.stateful_working_dir) if task.tmp_dir: try: fileio.rmtree(task.tmp_dir) except fileio.NotFoundError: logging.warning( 'tmp_dir %s not found while attempting to delete, ignoring.') if task.executor_output_uri: try: fileio.remove(task.executor_output_uri) except fileio.NotFoundError: logging.warning( 'Skipping deletion of executor_output_uri (file not found): %s', task.executor_output_uri)
def remove_stateful_working_dir(stateful_working_dir: str) -> None: """Remove stateful_working_dir.""" # Clean up stateful working dir # Note that: # stateful_working_dir = os.path.join( # self._node_dir, # _SYSTEM, # _STATEFUL_WORKING_DIR, <-- we want to clean from this level down. # dir_suffix) stateful_working_dir = os.path.abspath( os.path.join(stateful_working_dir, os.pardir)) try: fileio.rmtree(stateful_working_dir) except fileio.NotFoundError: logging.warning( 'stateful_working_dir %s is not found, not going to delete it.', stateful_working_dir)
def copy_dir(src: Text, dst: Text) -> None: """Copies the whole directory recursively from source to destination.""" src = src.rstrip('/') dst = dst.rstrip('/') if fileio.exists(dst): fileio.rmtree(dst) fileio.makedirs(dst) for dir_name, sub_dirs, leaf_files in fileio.walk(src): for leaf_file in leaf_files: leaf_file_path = os.path.join(dir_name, leaf_file) new_file_path = os.path.join(dir_name.replace(src, dst, 1), leaf_file) fileio.copy(leaf_file_path, new_file_path) for sub_dir in sub_dirs: fileio.makedirs(os.path.join(dir_name.replace(src, dst, 1), sub_dir))
def export(self, estimator, export_path, checkpoint_path, eval_result, is_the_final_export): """Exports the given `Estimator` to a specific format. Performs the export as defined by the base_exporter and invokes all of the specified rewriters. Args: estimator: the `Estimator` to export. export_path: A string containing a directory where to write the export. checkpoint_path: The checkpoint path to export. eval_result: The output of `Estimator.evaluate` on this checkpoint. is_the_final_export: This boolean is True when this is an export in the end of training. It is False for the intermediate exports during the training. When passing `Exporter` to `tf.estimator.train_and_evaluate` `is_the_final_export` is always False if `TrainSpec.max_steps` is `None`. Returns: The string path to the base exported directory or `None` if export is skipped. Raises: RuntimeError: Unable to create a temporary rewrite directory. """ base_path = self._base_exporter.export(estimator, export_path, checkpoint_path, eval_result, is_the_final_export) if not base_path: return None tmp_rewrite_folder = 'tmp-rewrite-' + str(int(time.time())) tmp_rewrite_path = os.path.join(export_path, tmp_rewrite_folder) if fileio.exists(tmp_rewrite_path): raise RuntimeError( 'Unable to create a unique temporary rewrite path.') fileio.makedirs(tmp_rewrite_path) _invoke_rewriter(base_path, tmp_rewrite_path, self._rewriter_inst, rewriter.ModelType.SAVED_MODEL, rewriter.ModelType.ANY_MODEL) fileio.rmtree(base_path) fileio.rename(tmp_rewrite_path, base_path) return base_path
def run_fn(fn_args: TrainerFnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = base.input_fn(fn_args.train_files, fn_args.data_accessor, tf_transform_output, 40) eval_dataset = base.input_fn(fn_args.eval_files, fn_args.data_accessor, tf_transform_output, 40) mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): model = base.build_keras_model() # Write logs to path tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=fn_args.model_run_dir, update_freq='batch') model.fit(train_dataset, steps_per_epoch=fn_args.train_steps, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, callbacks=[tensorboard_callback]) signatures = { 'serving_default': _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function( tf.TensorSpec(shape=[None, 784], dtype=tf.float32, name='image_floats')) } temp_saving_model_dir = os.path.join(fn_args.serving_model_dir, 'temp') model.save(temp_saving_model_dir, save_format='tf', signatures=signatures) tfrw = rewriter_factory.create_rewriter(rewriter_factory.TFLITE_REWRITER, name='tflite_rewriter') converters.rewrite_saved_model(temp_saving_model_dir, fn_args.serving_model_dir, tfrw, rewriter.ModelType.TFLITE_MODEL) fileio.rmtree(temp_saving_model_dir)
def run_executor( self, execution_info: data_types.ExecutionInfo ) -> execution_result_pb2.ExecutorOutput: output_dict = copy.deepcopy(execution_info.output_dict) result = execution_result_pb2.ExecutorOutput() for key, artifact_list in output_dict.items(): artifacts = execution_result_pb2.ExecutorOutput.ArtifactList() for artifact in artifact_list: artifacts.artifacts.append(artifact.mlmd_artifact) result.output_artifacts[key].CopyFrom(artifacts) # Although the following removing is typically not expected, but there is # no way to prevent them from happening. We should make sure that the # launcher can handle the double cleanup gracefully. fileio.rmtree( os.path.abspath( os.path.join(execution_info.stateful_working_dir, os.pardir))) fileio.rmtree(execution_info.tmp_dir) return result
def remove_stateful_working_dir(stateful_working_dir: Text) -> None: """Remove stateful_working_dir.""" # Clean up stateful working dir # Note that: # stateful_working_dir = os.path.join( # self._node_dir, # _SYSTEM, # _STATEFUL_WORKING_DIR, <-- we want to clean from this level down. # dir_suffix) stateful_working_dir = os.path.abspath( os.path.join(stateful_working_dir, os.pardir)) try: fileio.rmtree(stateful_working_dir) except Exception as e: # pylint: disable=broad-except if 'NotFoundError' in str(type(e)): # TODO(b/175244977): This is a workaround to avoid introducing # tensorflow dependency. Change this except block to use a generic # NotFoundError once it is Defined in fileio. logging.warning( 'stateful_working_dir %s is not found, not going to delete it.', stateful_working_dir) else: raise
def _clean_up_stateless_execution_info( self, execution_info: data_types.ExecutionInfo): # Clean up tmp dir fileio.rmtree(execution_info.tmp_dir)
def _clean_up(self, execution_info: data_types.ExecutionInfo): fileio.rmtree(execution_info.stateful_working_dir)
def run_fn(fn_args: FnArgs): """Train the model based on given args. Args: fn_args: Holds args used to train the model as name/value pairs. Raises: ValueError: if invalid inputs. """ tf_transform_output = tft.TFTransformOutput(fn_args.transform_output) train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, tf_transform_output, is_train=True, batch_size=_TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, tf_transform_output, is_train=False, batch_size=_EVAL_BATCH_SIZE) model, base_model = _build_keras_model() absl.logging.info('Tensorboard logging to {}'.format( fn_args.model_run_dir)) # Write logs to path tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=fn_args.model_run_dir, update_freq='batch') # Our training regime has two phases: we first freeze the backbone and train # the newly added classifier only, then unfreeze part of the backbone and # fine-tune with classifier jointly. steps_per_epoch = int(_TRAIN_DATA_SIZE / _TRAIN_BATCH_SIZE) total_epochs = int(fn_args.train_steps / steps_per_epoch) if _CLASSIFIER_EPOCHS > total_epochs: raise ValueError('Classifier epochs is greater than the total epochs') absl.logging.info('Start training the top classifier') model.fit(train_dataset, epochs=_CLASSIFIER_EPOCHS, steps_per_epoch=steps_per_epoch, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, callbacks=[tensorboard_callback]) absl.logging.info('Start fine-tuning the model') # Unfreeze the top MobileNet layers and do joint fine-tuning _freeze_model_by_percentage(base_model, 0.9) # We need to recompile the model because layer properties have changed model.compile( loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(lr=_FINETUNE_LEARNING_RATE), metrics=['sparse_categorical_accuracy']) model.summary(print_fn=absl.logging.info) model.fit(train_dataset, initial_epoch=_CLASSIFIER_EPOCHS, epochs=total_epochs, steps_per_epoch=steps_per_epoch, validation_data=eval_dataset, validation_steps=fn_args.eval_steps, callbacks=[tensorboard_callback]) # Prepare the TFLite model used for serving in MLKit signatures = { 'serving_default': _get_serve_image_fn(model).get_concrete_function( tf.TensorSpec(shape=[None, 224, 224, 3], dtype=tf.float32, name=_transformed_name(_IMAGE_KEY))) } temp_saving_model_dir = os.path.join(fn_args.serving_model_dir, 'temp') model.save(temp_saving_model_dir, save_format='tf', signatures=signatures) tfrw = rewriter_factory.create_rewriter(rewriter_factory.TFLITE_REWRITER, name='tflite_rewriter') converters.rewrite_saved_model(temp_saving_model_dir, fn_args.serving_model_dir, tfrw, rewriter.ModelType.TFLITE_MODEL) # Add necessary TFLite metadata to the model in order to use it within MLKit # TODO(dzats@): Handle label map file path more properly, currently # hard-coded. tflite_model_path = os.path.join(fn_args.serving_model_dir, _TFLITE_MODEL_NAME) # TODO(dzats@): Extend the TFLite rewriter to be able to add TFLite metadata #@ to the model. _write_metadata(model_path=tflite_model_path, label_map_path=fn_args.custom_config['labels_path'], mean=[127.5], std=[127.5]) fileio.rmtree(temp_saving_model_dir)
def _clean_up_stateless_execution_info( self, execution_info: data_types.ExecutionInfo): logging.info('Cleaning up stateless execution info.') # Clean up tmp dir fileio.rmtree(execution_info.tmp_dir)
def delete_dir(path: Text) -> None: """Deletes a directory if exists.""" if fileio.isdir(path): fileio.rmtree(path)
def _rewrite(self, original_model: rewriter.ModelDescription, rewritten_model: rewriter.ModelDescription): """Rewrites the provided model. Args: original_model: A `ModelDescription` specifying the original model to be rewritten. rewritten_model: A `ModelDescription` specifying the format and location of the rewritten model. Raises: ValueError: If the model could not be sucessfully rewritten. """ if rewritten_model.model_type not in [ rewriter.ModelType.TFLITE_MODEL, rewriter.ModelType.ANY_MODEL ]: raise ValueError('TFLiteConverter can only convert to the TFLite format.') # TODO(dzats): We create a temporary directory with a SavedModel that does # not contain an assets or assets.extra directory. Remove this when the # TFLite converter can convert models having these directories. tmp_model_dir = os.path.join( _ensure_str(rewritten_model.path), 'tmp-rewrite-' + str(int(time.time()))) if fileio.exists(tmp_model_dir): raise ValueError('TFLiteConverter is unable to create a unique path ' 'for the temp rewriting directory.') fileio.makedirs(tmp_model_dir) _create_tflite_compatible_saved_model( _ensure_str(original_model.path), tmp_model_dir) converter = self._create_tflite_converter( saved_model_path=tmp_model_dir, quantization_optimizations=self._quantization_optimizations, quantization_supported_types=self._quantization_supported_types, representative_dataset=self._representative_dataset, signature_key=self._signature_key, **self._kwargs) tflite_model = converter.convert() output_path = os.path.join( _ensure_str(rewritten_model.path), self._filename) with fileio.open(_ensure_str(output_path), 'wb') as f: f.write(_ensure_bytes(tflite_model)) fileio.rmtree(tmp_model_dir) copy_pairs = [] if self._copy_assets: src = os.path.join( _ensure_str(original_model.path), tf.saved_model.ASSETS_DIRECTORY) dst = os.path.join( _ensure_str(rewritten_model.path), tf.saved_model.ASSETS_DIRECTORY) if fileio.isdir(src): fileio.mkdir(dst) copy_pairs.append((src, dst)) if self._copy_assets_extra: src = os.path.join( _ensure_str(original_model.path), EXTRA_ASSETS_DIRECTORY) dst = os.path.join( _ensure_str(rewritten_model.path), EXTRA_ASSETS_DIRECTORY) if fileio.isdir(src): fileio.mkdir(dst) copy_pairs.append((src, dst)) for src, dst in copy_pairs: io_utils.copy_dir(src, dst)