def test_relative_path(self): m = keras.Model() v = m.add_weight(name='v', shape=[]) os.chdir(self.get_temp_dir()) prefix = 'ackpt' self.evaluate(v.assign(42.)) m.save_weights(prefix) self.assertTrue(file_io.file_exists('ackpt.index')) self.evaluate(v.assign(1.)) m.load_weights(prefix) self.assertEqual(42., self.evaluate(v)) prefix = 'subdir/ackpt' self.evaluate(v.assign(43.)) m.save_weights(prefix) self.assertTrue(file_io.file_exists('subdir/ackpt.index')) self.evaluate(v.assign(2.)) m.load_weights(prefix) self.assertEqual(43., self.evaluate(v)) prefix = 'ackpt/' self.evaluate(v.assign(44.)) m.save_weights(prefix) self.assertTrue(file_io.file_exists('ackpt/.index')) self.evaluate(v.assign(3.)) m.load_weights(prefix) self.assertEqual(44., self.evaluate(v))
def testRename(self): file_path = os.path.join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") rename_path = os.path.join(self._base_dir, "rename_file") file_io.rename(file_path, rename_path) self.assertTrue(file_io.file_exists(rename_path)) self.assertFalse(file_io.file_exists(file_path))
def _run_training_transform(self): """Runs training starting with transformed tf.example files.""" cloud = True if cloud: cmd = ['gcloud ml-engine jobs submit training test_mltoolbox_train_%s' % uuid.uuid4().hex, '--runtime-version=1.0', '--scale-tier=STANDARD_1', '--stream-logs'] else: cmd = ['gcloud ml-engine local train'] cmd = cmd + [ '--module-name trainer.task', '--job-dir=' + self._train_output, '--package-path=' + os.path.join(CODE_PATH, 'trainer'), '--', '--train=' + os.path.join(self._transform_output, 'features_train*'), '--eval=' + os.path.join(self._transform_output, 'features_eval*'), '--analysis=' + self._analysis_output, '--model=linear_regression', '--train-batch-size=10', '--eval-batch-size=10', '--max-steps=' + str(self._max_steps)] self._logger.debug('Running subprocess: %s \n\n' % ' '.join(cmd)) subprocess.check_call(' '.join(cmd), shell=True) # Check the saved model was made. self.assertTrue(file_io.file_exists( os.path.join(self._train_output, 'model', 'saved_model.pb'))) self.assertTrue(file_io.file_exists( os.path.join(self._train_output, 'evaluation_model', 'saved_model.pb')))
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_source_filepath_list = _maybe_save_assets(assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: tf_logging.info("No assets to write.") return assets_destination_dir = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. for asset_source_filepath in asset_source_filepath_list: asset_source_filename = os.path.basename(asset_source_filepath) asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as # part of multiple graphs is only copied the first time. if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", assets_destination_dir)
def _read_config_files(self, run_paths): configs = {} config_fpaths = {} for run_name, logdir in run_paths.items(): config_fpath = os.path.join(logdir, PROJECTOR_FILENAME) if not file_io.file_exists(config_fpath): # Skip runs that have no config file. continue # Read the config file. file_content = file_io.read_file_to_string(config_fpath).decode('utf-8') config = ProjectorConfig() text_format.Merge(file_content, config) if not config.model_checkpoint_path: # See if you can find a checkpoint file in the logdir. ckpt_path = latest_checkpoint(logdir) if not ckpt_path: # Or in the parent of logdir. ckpt_path = latest_checkpoint(os.path.join('../', logdir)) if not ckpt_path: logging.warning('Cannot find model checkpoint in %s', logdir) continue config.model_checkpoint_path = ckpt_path # Sanity check for the checkpoint file. if not file_io.file_exists(config.model_checkpoint_path): logging.warning('Checkpoint file %s not found', config.model_checkpoint_path) continue configs[run_name] = config config_fpaths[run_name] = config_fpath return configs, config_fpaths
def testRename(self): file_path = os.path.join(self._base_dir, "temp_file") file_io.write_string_to_file(file_path, "testing") rename_path = os.path.join(self._base_dir, "rename_file") file_io.rename(file_path, rename_path) self.assertTrue(file_io.file_exists(rename_path)) self.assertFalse(file_io.file_exists(file_path))
def testCreateRecursiveDir(self): dir_path = os.path.join(self._base_dir, "temp_dir/temp_dir1/temp_dir2") file_io.recursive_create_dir(dir_path) file_path = os.path.join(dir_path, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") self.assertTrue(file_io.file_exists(file_path)) file_io.delete_recursively(os.path.join(self._base_dir, "temp_dir")) self.assertFalse(file_io.file_exists(file_path))
def testRenameOverwriteFalse(self): file_path = os.path.join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") rename_path = os.path.join(self._base_dir, "rename_file") file_io.FileIO(rename_path, mode="w").write("rename") with self.assertRaises(errors.AlreadyExistsError): file_io.rename(file_path, rename_path, overwrite=False) self.assertTrue(file_io.file_exists(rename_path)) self.assertTrue(file_io.file_exists(file_path))
def testRenameOverwrite(self): file_path = os.path.join(self.get_temp_dir(), "temp_file") file_io.write_string_to_file(file_path, "testing") rename_path = os.path.join(self.get_temp_dir(), "rename_file") file_io.write_string_to_file(rename_path, "rename") file_io.rename(file_path, rename_path, overwrite=True) self.assertTrue(file_io.file_exists(rename_path)) self.assertFalse(file_io.file_exists(file_path)) file_io.delete_file(rename_path)
def testRenameOverwriteFalse(self): file_path = os.path.join(self.get_temp_dir(), "temp_file") file_io.write_string_to_file(file_path, "testing") rename_path = os.path.join(self.get_temp_dir(), "rename_file") file_io.write_string_to_file(rename_path, "rename") with self.assertRaises(errors.AlreadyExistsError): file_io.rename(file_path, rename_path, overwrite=False) self.assertTrue(file_io.file_exists(rename_path)) self.assertTrue(file_io.file_exists(file_path)) file_io.delete_file(rename_path) file_io.delete_file(file_path)
def save_model(model, saved_model_path): """Save a `tf.keras.Model` into Tensorflow SavedModel format. `save_model` generates such files/folders under the `saved_model_path` folder: 1) an asset folder containing the json string of the model's configuration(topology). 2) a checkpoint containing the model weights. Note that subclassed models can not be saved via this function, unless you provide an implementation for get_config() and from_config(). Also note that `tf.keras.optimizers.Optimizer` instances can not currently be saved to checkpoints. Use optimizers from `tf.train`. Args: model: A `tf.keras.Model` to be saved. saved_model_path: a string specifying the path to the SavedModel directory. Raises: NotImplementedError: If the passed in model is a subclassed model. """ if not model._is_graph_network: raise NotImplementedError # save model configuration as a json string under assets folder. model_json = model.to_json() assets_destination_dir = os.path.join( compat.as_bytes(saved_model_path), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) model_json_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(constants.SAVED_MODEL_FILENAME_JSON)) file_io.write_string_to_file(model_json_filepath, model_json) # save model weights in checkpoint format. checkpoint_destination_dir = os.path.join( compat.as_bytes(saved_model_path), compat.as_bytes(constants.VARIABLES_DIRECTORY)) if not file_io.file_exists(checkpoint_destination_dir): file_io.recursive_create_dir(checkpoint_destination_dir) checkpoint_prefix = os.path.join( compat.as_text(checkpoint_destination_dir), compat.as_text(constants.VARIABLES_FILENAME)) model.save_weights(checkpoint_prefix, save_format='tf', overwrite=True)
def testCopy(self): file_path = os.path.join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") copy_path = os.path.join(self._base_dir, "copy_file") file_io.copy(file_path, copy_path) self.assertTrue(file_io.file_exists(copy_path)) self.assertEqual(b"testing", file_io.read_file_to_string(file_path))
def _serve_metadata(self, query_params): run = query_params.get('run') if run is None: self.handler.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: self.handler.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: self.handler.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] fpath = self._get_metadata_file_for_tensor(name, config) if not fpath: self.handler.respond( 'Not metadata file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return if not file_io.file_exists(fpath) or file_io.is_directory(fpath): self.handler.respond('%s is not a file' % fpath, 'text/plain', 400) return with file_io.FileIO(fpath, 'r') as f: lines = [] for line in f: lines.append(line) if len(lines) >= LIMIT_NUM_POINTS: break self.handler.respond(''.join(lines), 'text/plain')
def _read_latest_config_files(self, run_path_pairs): """Reads and returns the projector config files in every run directory.""" configs = {} config_fpaths = {} for run_name, assets_dir in run_path_pairs: config = projector_config_pb2.ProjectorConfig() config_fpath = os.path.join(assets_dir, PROJECTOR_FILENAME) if file_io.file_exists(config_fpath): file_content = file_io.read_file_to_string(config_fpath) text_format.Merge(file_content, config) has_tensor_files = False for embedding in config.embeddings: if embedding.tensor_path: has_tensor_files = True break if not config.model_checkpoint_path: # See if you can find a checkpoint file in the logdir. logdir = _assets_dir_to_logdir(assets_dir) ckpt_path = _find_latest_checkpoint(logdir) if not ckpt_path and not has_tensor_files: continue if ckpt_path: config.model_checkpoint_path = ckpt_path # Sanity check for the checkpoint file. if (config.model_checkpoint_path and not checkpoint_exists(config.model_checkpoint_path)): logging.warning('Checkpoint file "%s" not found', config.model_checkpoint_path) continue configs[run_name] = config config_fpaths[run_name] = config_fpath return configs, config_fpaths
def _serve_bookmarks(self, query_params): run = query_params.get('run') if not run: self.handler.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: self.handler.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: self.handler.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] fpath = self._get_bookmarks_file_for_tensor(name, config) if not fpath: self.handler.respond( 'No bookmarks file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return if not file_io.file_exists(fpath) or file_io.is_directory(fpath): self.handler.respond('%s is not a file' % fpath, 'text/plain', 400) return bookmarks_json = None with file_io.FileIO(fpath, 'r') as f: bookmarks_json = f.read() self.handler.respond(bookmarks_json, 'application/json')
def write_graph(graph_def, logdir, name, as_text=True): """Writes a graph proto to a file. The graph is written as a binary proto unless `as_text` is `True`. ```python v = tf.Variable(0, name='my_variable') sess = tf.Session() tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt') ``` Args: graph_def: A `GraphDef` protocol buffer. logdir: Directory where to write the graph. This can refer to remote filesystems, such as Google Cloud Storage (GCS). name: Filename for the graph. as_text: If `True`, writes the graph as an ASCII proto. """ # gcs does not have the concept of directory at the moment. if not file_io.file_exists(logdir) and not logdir.startswith("gs:"): file_io.recursive_create_dir(logdir) path = os.path.join(logdir, name) if as_text: file_io.write_string_to_file(path, str(graph_def)) else: file_io.write_string_to_file(path, graph_def.SerializeToString())
def cloud_batch_predict(training_dir, prediction_input_file, output_dir, mode, batch_size, shard_files, output_format): """See batch_predict""" # from . import predict as predict_module from .prediction import predict as predict_module if mode == 'evaluation': model_dir = os.path.join(training_dir, 'evaluation_model') elif mode == 'prediction': model_dir = os.path.join(training_dir, 'model') else: raise ValueError('mode must be evaluation or prediction') if not file_io.file_exists(model_dir): raise ValueError('Model folder %s does not exist' % model_dir) _assert_gcs_files([training_dir, prediction_input_file, output_dir]) cmd = ['predict.py', '--cloud', '--project-id=%s' % _default_project(), '--predict-data=%s' % prediction_input_file, '--trained-model-dir=%s' % model_dir, '--output-dir=%s' % output_dir, '--output-format=%s' % output_format, '--batch-size=%s' % str(batch_size), '--shard-files' if shard_files else '--no-shard-files', '--extra-package=%s' % _TF_GS_URL, '--extra-package=%s' % _PROTOBUF_GS_URL, '--extra-package=%s' % _package_to_staging(output_dir) ] return predict_module.main(cmd)
def _serve_bookmarks(self, request): run = request.args.get('run') if not run: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: "%s"' % run, 'text/plain', 400) config = self.configs[run] fpath = self._get_bookmarks_file_for_tensor(name, config) if not fpath: return Respond( request, 'No bookmarks file found for tensor "%s" in the config file "%s"' % (name, self.config_fpaths[run]), 'text/plain', 400) fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run]) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '"%s" not found, or is not a file' % fpath, 'text/plain', 400) bookmarks_json = None with file_io.FileIO(fpath, 'rb') as f: bookmarks_json = f.read() return Respond(request, bookmarks_json, 'application/json')
def _GetBaseApiMap(self): """Get a map from graph op name to its base ApiDef. Returns: Dictionary mapping graph op name to corresponding ApiDef. """ # Convert base ApiDef in Multiline format to Proto format. converted_base_api_dir = os.path.join( test.get_temp_dir(), 'temp_base_api_defs') subprocess.check_call( [os.path.join(resource_loader.get_root_dir_with_all_resources(), _CONVERT_FROM_MULTILINE_SCRIPT), _BASE_API_DIR, converted_base_api_dir]) name_to_base_api_def = {} base_api_files = file_io.get_matching_files( os.path.join(converted_base_api_dir, 'api_def_*.pbtxt')) for base_api_file in base_api_files: if file_io.file_exists(base_api_file): api_defs = api_def_pb2.ApiDefs() text_format.Merge( file_io.read_file_to_string(base_api_file), api_defs) for api_def in api_defs.op: name_to_base_api_def[api_def.graph_op_name] = api_def return name_to_base_api_def
def save(self, as_text=False): """Writes a `SavedModel` protocol buffer to disk. The function writes the SavedModel protocol buffer to the export directory in serialized format. Args: as_text: Writes the SavedModel protocol buffer in text format to disk. Returns: The path to which the SavedModel protocol buffer was written. """ if not file_io.file_exists(self._export_dir): file_io.recursive_create_dir(self._export_dir) if as_text: path = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT)) file_io.write_string_to_file(path, str(self._saved_model)) else: path = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB)) file_io.write_string_to_file(path, self._saved_model.SerializeToString()) tf_logging.info("SavedModel written to: %s", path) return path
def read_meta_graph_file(filename): """Reads a file containing `MetaGraphDef` and returns the protocol buffer. Args: filename: `meta_graph_def` filename including the path. Returns: A `MetaGraphDef` protocol buffer. Raises: IOError: If the file doesn't exist, or cannot be successfully parsed. """ meta_graph_def = meta_graph_pb2.MetaGraphDef() if not file_io.file_exists(filename): raise IOError("File %s does not exist." % filename) # First try to read it as a binary file. file_content = file_io.FileIO(filename, "rb").read() try: meta_graph_def.ParseFromString(file_content) return meta_graph_def except Exception: # pylint: disable=broad-except pass # Next try to read it as a text file. try: text_format.Merge(file_content.decode("utf-8"), meta_graph_def) except text_format.ParseError as e: raise IOError("Cannot parse file %s: %s." % (filename, str(e))) return meta_graph_def
def get_experiment(output_dir): # Merge schema, input features, and transforms. train_config = util.merge_metadata(args.preprocess_output_dir, args.transforms_file) # Get the model to train. estimator = util.get_estimator(output_dir, train_config, args) # Save a copy of the scehma and input to the model folder. schema_file = os.path.join(args.preprocess_output_dir, util.SCHEMA_FILE) # Make list of files to save with the trained model. additional_assets = {'features.json': args.transforms_file, util.SCHEMA_FILE: schema_file} if util.is_classification_model(args.model_type): target_name = train_config['target_column'] vocab_file_name = util.CATEGORICAL_ANALYSIS % target_name vocab_file_path = os.path.join( args.preprocess_output_dir, vocab_file_name) assert file_io.file_exists(vocab_file_path) additional_assets[vocab_file_name] = vocab_file_path export_strategy_target = util.make_export_strategy( train_config=train_config, args=args, keep_target=True, assets_extra=additional_assets) export_strategy_notarget = util.make_export_strategy( train_config=train_config, args=args, keep_target=False, assets_extra=additional_assets) input_reader_for_train = get_reader_input_fn( train_config=train_config, preprocess_output_dir=args.preprocess_output_dir, model_type=args.model_type, data_paths=args.train_data_paths, batch_size=args.train_batch_size, shuffle=True, num_epochs=args.num_epochs) input_reader_for_eval = get_reader_input_fn( train_config=train_config, preprocess_output_dir=args.preprocess_output_dir, model_type=args.model_type, data_paths=args.eval_data_paths, batch_size=args.eval_batch_size, shuffle=False, num_epochs=1) return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_reader_for_train, eval_input_fn=input_reader_for_eval, train_steps=args.max_steps, export_strategies=[export_strategy_target, export_strategy_notarget], min_eval_frequency=args.min_eval_frequency, eval_steps=None, )
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_filename_map = _maybe_save_assets(assets_collection_to_add) # Return if there are no assets to write. if not asset_filename_map: tf_logging.info("No assets to write.") return assets_destination_dir = saved_model_utils.get_or_create_assets_dir( self._export_dir) # Copy each asset from source path to destination path. for asset_basename, asset_source_filepath in asset_filename_map.items(): asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_basename)) # Only copy the asset file to the destination if it does not already # exist. This is to ensure that an asset with the same name defined as # part of multiple graphs is only copied the first time. if not file_io.file_exists(asset_destination_filepath): file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", compat.as_text(assets_destination_dir))
def testAssets(self): export_dir = self._get_export_dir("test_assets") builder = saved_model_builder.SavedModelBuilder(export_dir) with self.test_session(graph=ops.Graph()) as sess: self._init_and_validate_variable(sess, "v", 42) # Build an asset collection. ignored_filepath = os.path.join( compat.as_bytes(test.get_temp_dir()), compat.as_bytes("ignored.txt")) file_io.write_string_to_file(ignored_filepath, "will be ignored") asset_collection = self._build_asset_collection("hello42.txt", "foo bar baz", "asset_file_tensor") builder.add_meta_graph_and_variables( sess, ["foo"], assets_collection=asset_collection) # Save the SavedModel to disk. builder.save() with self.test_session(graph=ops.Graph()) as sess: foo_graph = loader.load(sess, ["foo"], export_dir) self._validate_asset_collection(export_dir, foo_graph.collection_def, "hello42.txt", "foo bar baz", "asset_file_tensor:0") ignored_asset_path = os.path.join( compat.as_bytes(export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY), compat.as_bytes("ignored.txt")) self.assertFalse(file_io.file_exists(ignored_asset_path))
def _serve_sprite_image(self, request): run = request.args.get('run') if not run: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: "%s"' % run, 'text/plain', 400) config = self.configs[run] embedding_info = self._get_embedding(name, config) if not embedding_info or not embedding_info.sprite.image_path: return Respond( request, 'No sprite image file found for tensor "%s" in the config file "%s"' % (name, self.config_fpaths[run]), 'text/plain', 400) fpath = os.path.expanduser(embedding_info.sprite.image_path) fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run]) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '"%s" does not exist or is directory' % fpath, 'text/plain', 400) f = file_io.FileIO(fpath, 'rb') encoded_image_string = f.read() f.close() image_type = imghdr.what(None, encoded_image_string) mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE) return Respond(request, encoded_image_string, mime_type)
def _save_and_write_assets(self, assets_collection_to_add=None): """Saves asset to the meta graph and writes asset files to disk. Args: assets_collection_to_add: The collection where the asset paths are setup. """ asset_source_filepath_list = self._save_assets(assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: tf_logging.info("No assets to write.") return assets_destination_dir = os.path.join( compat.as_bytes(self._export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY)) if not file_io.file_exists(assets_destination_dir): file_io.recursive_create_dir(assets_destination_dir) # Copy each asset from source path to destination path. for asset_source_filepath in asset_source_filepath_list: asset_source_filename = os.path.basename(asset_source_filepath) asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) file_io.copy( asset_source_filepath, asset_destination_filepath, overwrite=True) tf_logging.info("Assets written to: %s", assets_destination_dir)
def local_batch_predict(training_dir, prediction_input_file, output_dir, mode, batch_size, shard_files, output_format): """See batch_predict""" # from . import predict as predict_module from .prediction import predict as predict_module if mode == 'evaluation': model_dir = os.path.join(training_dir, 'evaluation_model') elif mode == 'prediction': model_dir = os.path.join(training_dir, 'model') else: raise ValueError('mode must be evaluation or prediction') if not file_io.file_exists(model_dir): raise ValueError('Model folder %s does not exist' % model_dir) cmd = ['predict.py', '--predict-data=%s' % prediction_input_file, '--trained-model-dir=%s' % model_dir, '--output-dir=%s' % output_dir, '--output-format=%s' % output_format, '--batch-size=%s' % str(batch_size), '--shard-files' if shard_files else '--no-shard-files', '--has-target' if mode == 'evaluation' else '--no-has-target' ] # return predict_module.predict.main(cmd) return predict_module.main(cmd)
def _serve_sprite_image(self, request, query_params): run = query_params.get('run') if not run: request.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: request.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: request.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] embedding_info = self._get_embedding(name, config) if not embedding_info or not embedding_info.sprite.image_path: request.respond( 'No sprite image file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return fpath = embedding_info.sprite.image_path if not file_io.file_exists(fpath) or file_io.is_directory(fpath): request.respond( '%s does not exist or is directory' % fpath, 'text/plain', 400) return f = file_io.FileIO(fpath, 'r') encoded_image_string = f.read() f.close() image_type = imghdr.what(None, encoded_image_string) mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE) request.respond(encoded_image_string, mime_type)
def testFileWrite(self): file_path = os.path.join(self.get_temp_dir(), "temp_file") file_io.write_string_to_file(file_path, "testing") self.assertTrue(file_io.file_exists(file_path)) file_contents = file_io.read_file_to_string(file_path) self.assertEqual(b"testing", file_contents) file_io.delete_file(file_path)
def add_meta_graph_and_variables(self, sess, tags, signature_def_map=None, assets_collection=None, legacy_init_op=None): """Adds the current meta graph to the SavedModel and saves variables. Creates a Saver to save the variables from the provided session. Exports the corresponding meta graph def. This function assumes that the variables to be saved have been initialized. For a given `SavedModelBuilder`, this API must be called exactly once and for the first meta graph to save. For subsequent meta graph defs to be added, the `add_meta_graph()` API must be used. Args: sess: The TensorFlow session from which to save the meta graph and variables. tags: The set of tags with which to save the meta graph. signature_def_map: The map of signature def map to add to the meta graph def. assets_collection: Assets collection to be saved with SavedModel. legacy_init_op: Op or group of ops to execute after the restore op upon a load. """ if self._has_saved_variables: raise AssertionError("Variables and assets have already been saved. " "Please invoke `add_meta_graph()` instead.") # Save asset files and write them to disk, if any. self._save_and_write_assets(assets_collection) # Create the variables sub-directory, if it does not exist. variables_dir = os.path.join( compat.as_text(self._export_dir), compat.as_text(constants.VARIABLES_DIRECTORY)) if not file_io.file_exists(variables_dir): file_io.recursive_create_dir(variables_dir) variables_path = os.path.join( compat.as_text(variables_dir), compat.as_text(constants.VARIABLES_FILENAME)) # Add legacy init op to the SavedModel. self._maybe_add_legacy_init_op(legacy_init_op) # Save the variables and export meta graph def. saver = tf_saver.Saver( variables.all_variables(), sharded=True, write_version=saver_pb2.SaverDef.V2) saver.save(sess, variables_path, write_meta_graph=False) meta_graph_def = saver.export_meta_graph() # Tag the meta graph def and add it to the SavedModel. self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map) # Mark this instance of SavedModel as having saved variables, such that # subsequent attempts to save variables will fail. self._has_saved_variables = True
def testGetMatchingFiles(self): dir_path = file_io.join(self._base_dir, "temp_dir") file_io.create_dir(dir_path) files = ["file1.txt", "file2.txt", "file3.txt", "file*.txt"] for name in files: file_path = file_io.join(dir_path, name) file_io.FileIO(file_path, mode="w").write("testing") expected_match = [file_io.join(dir_path, name) for name in files] self.assertItemsEqual( file_io.get_matching_files(file_io.join(dir_path, "file*.txt")), expected_match) self.assertItemsEqual(file_io.get_matching_files(tuple()), []) files_subset = [ file_io.join(dir_path, files[0]), file_io.join(dir_path, files[2]) ] self.assertItemsEqual( file_io.get_matching_files(files_subset), files_subset) file_io.delete_recursively(dir_path) self.assertFalse(file_io.file_exists(file_io.join(dir_path, "file3.txt")))
def load(export_dir): """Load a SavedModel from `export_dir`.""" object_graph_filename = os.path.join( compat.as_bytes(export_dir), compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY), compat.as_bytes("object_graph.pb")) if file_io.file_exists(object_graph_filename): # If there is an object graph associated with the SavedModel, we'll create a # root object from that. object_graph_string = file_io.FileIO(object_graph_filename, "rb").read() object_graph_proto = ( saved_object_graph_pb2.SavedObjectGraph()) object_graph_proto.ParseFromString(object_graph_string) root = _recreate_object_graph(object_graph_proto) else: raise NotImplementedError( "Currently only SavedModels exported with `tf.saved_model.save` may be " "imported. Other SavedModels may eventually be supported via load().") # TODO(allenl): load functions from the SavedModel into the eager context return root
def _serve_metadata(self, request): run = request.args.get('run') if run is None: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) num_rows = _parse_positive_int_param(request, 'num_rows') if num_rows == -1: return Respond(request, 'query parameter num_rows must be integer > 0', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: %s' % run, 'text/plain', 400) config = self.configs[run] fpath = self._get_metadata_file_for_tensor(name, config) if not fpath: return Respond( request, 'No metadata file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '%s is not a file' % fpath, 'text/plain', 400) num_header_rows = 0 with file_io.FileIO(fpath, 'r') as f: lines = [] # Stream reading the file with early break in case the file doesn't fit in # memory. for line in f: lines.append(line) if len(lines) == 1 and '\t' in lines[0]: num_header_rows = 1 if num_rows and len(lines) >= num_rows + num_header_rows: break return Respond(request, ''.join(lines), 'text/plain')
def calibration_structure_builder(self, columns_to_tensors, hparams): """Returns the calibration structure of the model. See base class.""" # Check to make sure input features are single dimensional. for (column, tensor) in six.iteritems(columns_to_tensors): if len(tensor.shape) > 1 and tensor.shape.dims[1].value > 1: raise ValueError( 'Separately calibrated RTLs do not support multi dimensional ' 'features: %s with shape %s' % (column, tensor.shape)) sorted_columns = sorted(columns_to_tensors.keys()) n_inputs = len(columns_to_tensors) num_lattices = hparams.get_param('num_lattices') lattice_rank = hparams.get_param('lattice_rank') rtl_seed = hparams.get_param('rtl_seed') # Create and save structure if it does not exists. if not file_io.file_exists(self._structure_file): structure = self._create_structure(n_inputs, num_lattices, lattice_rank, rtl_seed) self._save_structure(structure) structure = self._load_structure() # Check structure is what we expect. if len(structure) != num_lattices: raise ValueError( 'Expect %d number of lattices, but found %d number of lattices in ' 'structure: %s' % (num_lattices, len(structure), str(structure))) calibration_structure = [] for indices in structure: if len(indices) != lattice_rank: raise ValueError( 'Expect %d lattice rank, but found %d in structure: %s' % (lattice_rank, len(indices), str(structure))) sub_columns_to_tensors = { sorted_columns[i]: columns_to_tensors[sorted_columns[i]] for i in indices } calibration_structure.append(sub_columns_to_tensors) return calibration_structure
def process(self, element): try: row, label_ids = element.element except AttributeError: row, label_ids = element id = int(row[ID_COL]) images_count = int(row[IMAGES_COUNT_COL]) if images_count < 1: embedding = None else: emb_filepath = "%s/%s" % (self._emb_path, id_to_path(id)) if not file_io.file_exists(emb_filepath): embedding = None logging.warn('file is not exists: %s', emb_filepath) empty_imgs_count.inc() else: embedding = self._fetch_embedding(emb_filepath) yield row, label_ids, embedding
def get_model(model_path): if file_io.file_exists(model_path): with file_io.FileIO(model_path, mode='r') as input_f: with file_io.FileIO("model.h5", mode='w+') as output_f: output_f.write(input_f.read()) return load_model("model.h5") else: print("Building new model") model = Sequential([ Dense(9, activation='relu', input_shape=(9, )), Dense(64, activation='relu'), Dense(64, activation='relu'), Dense(9, activation='softmax') ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def __init__(self, html_template_path, export_report_path): """Reads the HTML template content. Args: html_template_path: A string, path to the template HTML file. export_report_path: A string, path to the generated HTML report. This path should point to a '.html' file with date and time in its name. e.g. 2019-01-01-10:05.toco_report.html. Raises: IOError: File doesn't exist. """ # Load the template HTML. if not _file_io.file_exists(html_template_path): raise IOError( "File '{0}' does not exist.".format(html_template_path)) with _file_io.FileIO(html_template_path, "r") as f: self.html_template = f.read() _file_io.recursive_create_dir(os.path.dirname(export_report_path)) self.export_report_path = export_report_path
def __init__(self, export_dir): self._saved_model = saved_model_pb2.SavedModel() self._saved_model.saved_model_schema_version = ( constants.SAVED_MODEL_SCHEMA_VERSION) self._export_dir = export_dir if file_io.file_exists(export_dir): if file_io.list_directory(export_dir): raise AssertionError( "Export directory already exists, and isn't empty. Please choose " "a different export directory, or delete all the contents of the " "specified directory: %s" % export_dir) else: file_io.recursive_create_dir(self._export_dir) # Boolean to track whether variables and assets corresponding to the # SavedModel have been saved. Specifically, the first meta graph to be added # MUST use the add_meta_graph_and_variables() API. Subsequent add operations # on the SavedModel MUST use the add_meta_graph() API which does not save # weights. self._has_saved_variables = False
def classify_run1(begin_flag): if(begin_flag): init() #logging.set_verbosity(tf.logging.ERROR) if FLAGS.input_model_tgz: if FLAGS.train_dir: raise ValueError("You cannot supply --train_dir if supplying " "--input_model_tgz") # Untar. if not os.path.exists(FLAGS.untar_model_dir): os.makedirs(FLAGS.untar_model_dir) tarfile.open(FLAGS.input_model_tgz).extractall(FLAGS.untar_model_dir) FLAGS.train_dir = FLAGS.untar_model_dir flags_dict_file = os.path.join(FLAGS.train_dir, "model_flags.json") if not file_io.file_exists(flags_dict_file): raise IOError("Cannot find %s. Did you run eval.py?" % flags_dict_file) flags_dict = json.loads(file_io.FileIO(flags_dict_file, "r").read()) # convert feature_names and feature_sizes to lists of values feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes( flags_dict["feature_names"], flags_dict["feature_sizes"]) if flags_dict["frame_features"]: reader = readers.YT8MFrameFeatureReader(feature_names=feature_names, feature_sizes=feature_sizes) else: reader = readers.YT8MAggregatedFeatureReader(feature_names=feature_names, feature_sizes=feature_sizes) if not FLAGS.output_file: raise ValueError("'output_file' was not specified. " "Unable to continue with inference.") if not FLAGS.input_data_pattern: raise ValueError("'input_data_pattern' was not specified. " "Unable to continue with inference.") return inference(reader, FLAGS.train_dir, FLAGS.input_data_pattern, FLAGS.output_file, FLAGS.batch_size, FLAGS.top_k)
def write_graph(graph_or_graph_def, logdir, name, as_text=True): """Writes a graph proto to a file. The graph is written as a binary proto unless `as_text` is `True`. ```python v = tf.Variable(0, name='my_variable') sess = tf.Session() tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt') ``` or ```python v = tf.Variable(0, name='my_variable') sess = tf.Session() tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt') ``` Args: graph_or_graph_def: A `Graph` or a `GraphDef` protocol buffer. logdir: Directory where to write the graph. This can refer to remote filesystems, such as Google Cloud Storage (GCS). name: Filename for the graph. as_text: If `True`, writes the graph as an ASCII proto. """ if isinstance(graph_or_graph_def, ops.Graph): graph_def = graph_or_graph_def.as_graph_def() else: graph_def = graph_or_graph_def # gcs does not have the concept of directory at the moment. if not file_io.file_exists(logdir) and not logdir.startswith('gs:'): file_io.recursive_create_dir(logdir) path = os.path.join(logdir, name) if as_text: file_io.atomic_write_string_to_file(path, str(graph_def)) else: file_io.atomic_write_string_to_file(path, graph_def.SerializeToString())
def _read_latest_config_files(self, run_path_pairs): """Reads and returns the projector config files in every run directory.""" configs = {} config_fpaths = {} for run_name, logdir in run_path_pairs: config = ProjectorConfig() config_fpath = os.path.join(logdir, PROJECTOR_FILENAME) if file_io.file_exists(config_fpath): file_content = file_io.read_file_to_string( config_fpath).decode('utf-8') text_format.Merge(file_content, config) has_tensor_files = False for embedding in config.embeddings: if embedding.tensor_path: has_tensor_files = True break if not config.model_checkpoint_path: # See if you can find a checkpoint file in the logdir. ckpt_path = latest_checkpoint(logdir) if not ckpt_path: # Or in the parent of logdir. ckpt_path = latest_checkpoint( os.path.join(logdir, os.pardir)) if not ckpt_path and not has_tensor_files: continue if ckpt_path: config.model_checkpoint_path = ckpt_path # Sanity check for the checkpoint file. if (config.model_checkpoint_path and not checkpoint_exists(config.model_checkpoint_path)): logging.warning('Checkpoint file %s not found', config.model_checkpoint_path) continue configs[run_name] = config config_fpaths[run_name] = config_fpath return configs, config_fpaths
def restore(self): """Restore the training state from the backed up checkpoint file. Returns: True if the training state is successfully restored. False if the training state doesn't need to be restored, or error occurred so it can't. """ self._assert_in_multi_worker_mode() if not dc_context.get_current_worker_context().experimental_should_init: # For multi-worker training, it should not restore a model in certain # worker setting (e.g. non-chief worker in ParameterServerStrategy). return False if file_io.file_exists(self._backup_dir): try: # Load the weights plus CKPT_SAVED_EPOCH variable. self._model.load_weights(self._backup_filepath) return True except (IOError, ValueError) as e: raise ValueError('Error loading file from {}. Reason: {}'.format( self._backup_filepath, e)) return False
def _serve_metadata(self, request, query_params): run = query_params.get('run') if run is None: request.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: request.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: request.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] fpath = self._get_metadata_file_for_tensor(name, config) if not fpath: request.respond( 'No metadata file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return if not file_io.file_exists(fpath) or file_io.is_directory(fpath): request.respond('%s is not a file' % fpath, 'text/plain', 400) return num_header_rows = 0 with file_io.FileIO(fpath, 'r') as f: lines = [] # Stream reading the file with early break in case the file doesn't fit in # memory. for line in f: lines.append(line) if len(lines) == 1 and '\t' in lines[0]: num_header_rows = 1 if len(lines) >= LIMIT_NUM_POINTS + num_header_rows: break request.respond(''.join(lines), 'text/plain')
def call_model_predictions_from_df(project_name, tmp_tfrecords_gcs_path, tmp_tfrecords_with_predictions_gcs_path, model_name, version_name=None): """Calls a prediction job. Args: project_name: gcp project name. tmp_tfrecords_gcs_path: gcs path to store tf_records, which will be inputs to batch prediction job. tmp_tfrecords_with_predictions_gcs_path: gcs path to store tf_records, which will be outputs to batch prediction job. model_name: Model name used to run predictions. The model must take as inputs TF-Records with fields $TEXT_FEATURE_NAME and $SENTENCE_KEY, and should return a dictionary including the field $LABEL_NAME. version_name: Model version to run predictions. If None, it will use default version of the model. Returns: job_id: the job_id of the prediction job. Raises: ValueError: if tmp_tfrecords_gcs_path does not exist. """ # Create tf-records if necessary. if not file_io.file_exists(tmp_tfrecords_gcs_path): raise ValueError('tf_records do not exist.') # Call batch prediction job. job_id = _call_batch_job( project_name, input_paths=tmp_tfrecords_gcs_path, output_path=tmp_tfrecords_with_predictions_gcs_path, model_name=model_name, version_name=version_name) return job_id
def _latest_checkpoints_changed(configs, run_path_pairs): """Returns true if the latest checkpoint has changed in any of the runs.""" for run_name, logdir in run_path_pairs: if run_name not in configs: config = ProjectorConfig() config_fpath = os.path.join(logdir, PROJECTOR_FILENAME) if file_io.file_exists(config_fpath): file_content = file_io.read_file_to_string(config_fpath) text_format.Merge(file_content, config) else: config = configs[run_name] # See if you can find a checkpoint file in the logdir. ckpt_path = latest_checkpoint(logdir) if not ckpt_path: # See if you can find a checkpoint in the parent of logdir. ckpt_path = latest_checkpoint(os.path.join(logdir, os.pardir)) if not ckpt_path: continue if config.model_checkpoint_path != ckpt_path: return True return False
def load_library(library_location): """Loads a TensorFlow plugin. "library_location" can be a path to a specific shared object, or a folder. If it is a folder, all sahred objects that are named "libtfkernel*" will be loaded. When the library is loaded, kernels registered in the library via the `REGISTER_*` macros are made available in the TensorFlow process. Args: library_location: Path to the plugin or the folder of plugins. Relative or absolute filesystem path to a dynamic library file or folder. Returns: None Raises: OSError: When the file to be loaded is not found. RuntimeError: when unable to load the library. """ if file_io.file_exists(library_location): if file_io.is_directory(library_location): directory_contents = file_io.list_directory(library_location) kernel_libraries = [ os.path.join(library_location, f) for f in directory_contents if _is_shared_object(f) ] else: kernel_libraries = [library_location] for lib in kernel_libraries: py_tf.TF_LoadLibrary(lib) else: raise OSError( errno.ENOENT, 'The file or folder to load kernel libraries from does not exist.', library_location)
def _serve_sprite_image(self, request, query_params): run = query_params.get('run') if not run: request.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: request.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: request.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] embedding_info = self._get_embedding(name, config) if not embedding_info or not embedding_info.sprite.image_path: request.respond( 'No sprite image file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return fpath = embedding_info.sprite.image_path if not file_io.file_exists(fpath) or file_io.is_directory(fpath): request.respond('%s does not exist or is directory' % fpath, 'text/plain', 400) return f = file_io.FileIO(fpath, 'r') encoded_image_string = f.read() f.close() image_type = imghdr.what(None, encoded_image_string) mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE) request.respond(encoded_image_string, mime_type)
def get_vocabulary(preprocess_output_dir, name): """Loads the vocabulary file as a list of strings. Args: preprocess_output_dir: Should contain the file CATEGORICAL_ANALYSIS % name. name: name of the csv column. Returns: List of strings. Raises: ValueError: if file is missing. """ vocab_file = os.path.join(preprocess_output_dir, CATEGORICAL_ANALYSIS % name) if not file_io.file_exists(vocab_file): raise ValueError('File %s not found in %s' % (CATEGORICAL_ANALYSIS % name, preprocess_output_dir)) labels = file_io.read_file_to_string(vocab_file).split('\n') label_values = [x for x in labels if x] # remove empty lines return label_values
def proc_tensorboard_can_still_save_to_temp_even_if_it_exists( test_obj): model, _, train_ds, steps = _model_setup(test_obj, file_format='') num_epoch = 2 saving_filepath = os.path.join( test_obj.get_temp_dir(), 'logfile_%s' % (test_base.get_task_type())) saving_filepath_for_temp = os.path.join(saving_filepath, 'workertemp_1') os.mkdir(saving_filepath) os.mkdir(saving_filepath_for_temp) # Verifies that even if `saving_filepath_for_temp` exists, tensorboard # can still save to temporary directory. test_obj.assertTrue(file_io.file_exists(saving_filepath_for_temp)) model.fit( x=train_ds, epochs=num_epoch, steps_per_epoch=steps, callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)])
def testWriteTransformFnIsIdempotent(self): transform_output_dir = os.path.join(self.get_temp_dir(), 'output') def mock_write_metadata_expand(unused_self, unused_metadata): raise ArithmeticError('Some error') with beam.Pipeline() as pipeline: # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) with mock.patch.object( transform_fn_io.beam_metadata_io.WriteMetadata, 'expand', mock_write_metadata_expand): with self.assertRaisesRegexp(ArithmeticError, 'Some error'): _ = ( (saved_model_dir_pcoll, object()) | transform_fn_io.WriteTransformFn(transform_output_dir)) self.assertFalse(file_io.file_exists(transform_output_dir))
def read_vocab(args, column_name): """Reads a vocab file if it exists. Args: args: command line flags column_name: name of column to that has a vocab file. Returns: List of vocab words or [] if the vocab file is not found. """ vocab_path = os.path.join(args.analysis_output_dir, VOCAB_ANALYSIS_FILE % column_name) if not file_io.file_exists(vocab_path): return [] vocab_str = file_io.read_file_to_string(vocab_path) vocab = pd.read_csv( six.StringIO(vocab_str), header=None, names=['token', 'count'], dtype=str) # Prevent pd from converting numerical categories.) return vocab['token'].tolist()
def to_savedmodel(model, export_path): """Convert the Keras HDF5 model into TensorFlow SavedModel.""" if export_path.startswith('gs://'): _save_oncloud(model, export_path) else: ### Allow overwriting of export_path if it already exists by removing it first.. if file_io.file_exists(export_path): file_io.delete_recursively(export_path) builder = saved_model_builder.SavedModelBuilder(export_path) signature = predict_signature_def(inputs={'input': model.inputs[0]}, outputs={'income': model.outputs[0]}) with K.get_session() as sess: builder.add_meta_graph_and_variables( sess=sess, tags=[tag_constants.SERVING], signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature }) builder.save()
def append_index(filesets, step=False): index_path = os.path.join(a.output_dir, "index.html") if file_io.file_exists(index_path): index = file_io.FileIO(index_path, "a") else: index = file_io.FileIO(index_path, "w") index.write("<html><body><table><tr>") if step: index.write("<th>step</th>") index.write( "<th>name</th><th>input</th><th>output</th><th>target</th></tr>") for fileset in filesets: index.write("<tr>") if step: index.write("<td>%d</td>" % fileset["step"]) index.write("<td>%s</td>" % fileset["name"]) for kind in ["inputs", "outputs", "targets"]: index.write("<td><img src='images/%s'></td>" % fileset[kind]) index.write("</tr>") return index_path
def _read_latest_config_files(self, run_path_pairs): """Reads and returns the projector config files in every run directory.""" configs = {} config_fpaths = {} for run_name, assets_dir in run_path_pairs: config = projector_config_pb2.ProjectorConfig() config_fpath = os.path.join(assets_dir, PROJECTOR_FILENAME) if file_io.file_exists(config_fpath): file_content = file_io.read_file_to_string(config_fpath) text_format.Merge(file_content, config) has_tensor_files = False for embedding in config.embeddings: if embedding.tensor_path: if not embedding.tensor_name: embedding.tensor_name = os.path.basename(embedding.tensor_path) has_tensor_files = True break if not config.model_checkpoint_path: # See if you can find a checkpoint file in the logdir. logdir = _assets_dir_to_logdir(assets_dir) ckpt_path = _find_latest_checkpoint(logdir) if not ckpt_path and not has_tensor_files: continue if ckpt_path: config.model_checkpoint_path = ckpt_path # Sanity check for the checkpoint file. if (config.model_checkpoint_path and not checkpoint_exists(config.model_checkpoint_path)): logging.warning('Checkpoint file "%s" not found', config.model_checkpoint_path) continue configs[run_name] = config config_fpaths[run_name] = config_fpath return configs, config_fpaths
def _load_xgboost_model(model_path): """Loads an xgboost model from GCS or local. Args: model_path: path to the directory containing the xgboost model.bst file. This path can be either a local path or a GCS path. Returns: A xgboost.Booster with the model at model_path loaded. Raises: PredictionError: If there is a problem while loading the file. """ # TODO(b/64574886): Move this to the top once b/64574886 is resolved. Before # then, it would work in production since we install xgboost in the # Dockerfile, but the problem is the unit test that will fail to build and run # since xgboost can not be added as a dependency to this target. import xgboost as xgb # pylint: disable=g-import-not-at-top model_file = os.path.join(model_path, MODEL_FILE_NAME_BST) if not file_io.file_exists(model_file): return None try: if model_file.startswith("gs://"): with file_io.FileIO(model_file, mode="rb") as f: # TODO(b/72736769): Load model in memory twice. Use readinto if/when # that becomes available in FileIO. Or copy model locally before # loading. model_buf = bytearray(f.read()) return xgb.Booster(model_file=model_buf) else: return xgb.Booster(model_file=model_file) except xgb.core.XGBoostError as e: error_msg = "Could not load the model: {}. {}.".format( os.path.join(model_path, MODEL_FILE_NAME_BST), str(e)) logging.critical(error_msg) raise PredictionError(PredictionError.FAILED_TO_LOAD_MODEL, error_msg)
def evaluate(): tf.set_random_seed(0) # for reproducibility # Write json of flags model_flags_path = os.path.join(FLAGS.train_dir, "model_flags.json") if not file_io.file_exists(model_flags_path): raise IOError(("Cannot find file %s. Did you run train.py on the same " "--train_dir?") % model_flags_path) flags_dict = json.loads(file_io.FileIO(model_flags_path, mode="r").read()) ## loading the mapping matrix file_dir = '/home/weimin/yt8m/code/youtube-8m/' + 'mapping_from_2017_to_2018_withAddCol.npz' with open(file_dir, 'rb') as f: mapping_2017 = np.load(f) with tf.Graph().as_default(): # convert feature_names and feature_sizes to lists of values feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes( flags_dict["feature_names"], flags_dict["feature_sizes"]) if flags_dict["frame_features"]: reader = readers.YT8MFrameFeatureReader( feature_names=feature_names, feature_sizes=feature_sizes, num_classes=4716, mapping_mat=mapping_2017) else: reader = readers.YT8MAggregatedFeatureReader( feature_names=feature_names, feature_sizes=feature_sizes, num_classes=4716, mapping_mat=mapping_2017) model = find_class_by_name(flags_dict["model"], [frame_level_models, video_level_models])() label_loss_fn = find_class_by_name(flags_dict["label_loss"], [losses])() if FLAGS.eval_data_pattern is "": raise IOError("'eval_data_pattern' was not specified. " + "Nothing to evaluate.") build_graph(reader=reader, model=model, eval_data_pattern=FLAGS.eval_data_pattern, label_loss_fn=label_loss_fn, num_readers=FLAGS.num_readers, batch_size=FLAGS.batch_size) logging.info("built evaluation graph") video_id_batch = tf.get_collection("video_id_batch")[0] prediction_batch = tf.get_collection("predictions")[0] label_batch = tf.get_collection("labels")[0] #loss = tf.get_collection("loss")[0] summary_op = tf.get_collection("summary_op")[0] saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=tf.get_default_graph()) evl_metrics = eval_util.EvaluationMetrics(reader.num_classes, FLAGS.top_k) last_global_step_val = -1 while True: last_global_step_val = evaluation_loop( video_id_batch, prediction_batch, label_batch, summary_op, saver, summary_writer, evl_metrics, last_global_step_val, FLAGS.total_ens_times) if FLAGS.run_once: break
from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.lib.io import file_io import numpy as np from model import inference FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_integer('input_height', '72', '') tf.app.flags.DEFINE_integer('input_width', '24', '') tf.app.flags.DEFINE_integer('input_ch', '1', '') tf.app.flags.DEFINE_string('restore', None, '') tf.app.flags.DEFINE_bool('as_text', False, '') if file_io.file_exists('./model'): file_io.delete_recursively('./model') builder = tf.saved_model.builder.SavedModelBuilder('./model') with tf.Session(graph=tf.Graph()) as sess: ph_images = tf.placeholder( tf.float32, shape=[None, FLAGS.input_height, FLAGS.input_width, FLAGS.input_ch]) ph_lengths = tf.placeholder(tf.float32) ph_widths = tf.placeholder(tf.float32) ph_areas = tf.placeholder(tf.float32) logits = inference(ph_images, ph_lengths, ph_widths, ph_areas, 1.0, False) accuracies = tf.nn.softmax(logits, name='accuracies') _, predicts = tf.nn.top_k(accuracies, k=2, name='predicts')
def add_meta_graph_and_variables(self, sess, tags, signature_def_map=None, assets_collection=None, legacy_init_op=None, clear_devices=False, main_op=None): """Adds the current meta graph to the SavedModel and saves variables. Creates a Saver to save the variables from the provided session. Exports the corresponding meta graph def. This function assumes that the variables to be saved have been initialized. For a given `SavedModelBuilder`, this API must be called exactly once and for the first meta graph to save. For subsequent meta graph defs to be added, the `add_meta_graph()` API must be used. Args: sess: The TensorFlow session from which to save the meta graph and variables. tags: The set of tags with which to save the meta graph. signature_def_map: The map of signature def map to add to the meta graph def. assets_collection: Assets collection to be saved with SavedModel. legacy_init_op: Legacy support for op or group of ops to execute after the restore op upon a load. clear_devices: Set to true if the device info on the default graph should be cleared. main_op: Op or group of ops to execute when the graph is loaded. """ if self._has_saved_variables: raise AssertionError( "Graph state including variables and assets has " "already been saved. Please invoke " "`add_meta_graph()` instead.") # Validate the signature def map to ensure all included TensorInfos are # properly populated. self._validate_signature_def_map(signature_def_map) # Save asset files and write them to disk, if any. self._save_and_write_assets(assets_collection) # Create the variables sub-directory, if it does not exist. variables_dir = os.path.join( compat.as_text(self._export_dir), compat.as_text(constants.VARIABLES_DIRECTORY)) if not file_io.file_exists(variables_dir): file_io.recursive_create_dir(variables_dir) variables_path = os.path.join( compat.as_text(variables_dir), compat.as_text(constants.VARIABLES_FILENAME)) if main_op is None: # Add legacy init op to the SavedModel. self._maybe_add_legacy_init_op(legacy_init_op) else: self._add_main_op(main_op) # Initialize a saver to generate a sharded output for all saveables in the # current scope. saver = tf_saver.Saver( variables._all_saveable_objects(), # pylint: disable=protected-access sharded=True, write_version=saver_pb2.SaverDef.V2, allow_empty=True) # Save the variables. Also, disable writing the checkpoint state proto. The # file is not used during SavedModel loading. In addition, since a # SavedModel can be copied or moved, this avoids the checkpoint state to # become outdated. saver.save(sess, variables_path, write_meta_graph=False, write_state=False) # Export the meta graph def. # The graph almost certainly previously contained at least one Saver, and # possibly several (e.g. one for loading a pretrained embedding, and another # for the model weights). However, a *new* Saver was just created that # includes all of the variables. In the context of the SavedModel, this # new Saver is the only one that needs to be retained. The associated # checkpoint that was saved just above contains all of the variable values. # Thus, any preexisting Savers are redundant and useless at best, but worse # may break downstream graph-processing tools, and can be confusing during # debugging. It is therefore safe and wise to set `clear_extraneous_savers` # to `True`, since it removes both the extraneous SaverDefs and their # associated Save/Restore Ops from the graph. meta_graph_def = saver.export_meta_graph(clear_devices=clear_devices, clear_extraneous_savers=True) # Tag the meta graph def and add it to the SavedModel. self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map) # Mark this instance of SavedModel as having saved variables, such that # subsequent attempts to save variables will fail. self._has_saved_variables = True
# Find the location of this exact file. _current_file_location = _inspect.getfile(_inspect.currentframe()) def _running_from_pip_package(): return any( _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs) if _running_from_pip_package(): for s in _site_packages_dirs: # TODO(gunan): Add sanity checks to loaded modules here. plugin_dir = _os.path.join(s, 'tensorflow-plugins') if _fi.file_exists(plugin_dir): _ll.load_library(plugin_dir) # These symbols appear because we import the python package which # in turn imports from tensorflow.core and tensorflow.python. They # must come from this module. So python adds these symbols for the # resolution to succeed. # pylint: disable=undefined-variable try: del python except NameError: pass try: del core except NameError: pass