def __init__(self, file_io_module=None, bucket_name=None): if bucket_name is None: bucket_name = os.environ.get(self._test_bucket_env_key, None) if bucket_name is None: # will mock gcs locally for tests if file_io_module is None: raise ValueError('`file_io_module` must be provided for mocking') self.mock_gcs = True self.file_io_module = file_io_module self.local_objects = {} self.bucket_name = 'mock-bucket' else: # will use real bucket for tests if bucket_name.startswith(self._gcs_prefix): bucket_name = bucket_name[len(self._gcs_prefix):] self.bucket_name = bucket_name if tf_file_io is None: raise ImportError( 'tensorflow must be installed to read/write to GCS') try: # check that bucket exists and is accessible tf_file_io.is_directory(self.bucket_path) except: raise IOError( 'could not access provided bucket {}'.format(self.bucket_path)) self.mock_gcs = False self.file_io_module = None self.local_objects = None self.patched_file_io = None self._is_started = False
def testIsDirectory(self): dir_path = os.path.join(self._base_dir, "test_dir") # Failure for a non-existing dir. with self.assertRaises(errors.NotFoundError): file_io.is_directory(dir_path) file_io.create_dir(dir_path) self.assertTrue(file_io.is_directory(dir_path)) file_path = os.path.join(dir_path, "test_file") file_io.FileIO(file_path, mode="w").write("test") # False for a file. self.assertFalse(file_io.is_directory(file_path))
def testIsDirectory(self): dir_path = os.path.join(self._base_dir, "test_dir") # Failure for a non-existing dir. self.assertFalse(file_io.is_directory(dir_path)) file_io.create_dir(dir_path) self.assertTrue(file_io.is_directory(dir_path)) file_path = os.path.join(dir_path, "test_file") file_io.FileIO(file_path, mode="w").write("test") # False for a file. self.assertFalse(file_io.is_directory(file_path)) # Test that the value returned from `stat()` has `is_directory` set. file_statistics = file_io.stat(dir_path) self.assertTrue(file_statistics.is_directory)
def test_chunked_file_dir(self, d): f = a.ChunkedFile("tmp_chunk") f.set_loc(ff(d)) assert not f.exists() f.create_dirs() assert not f.exists() assert io.is_directory(d) f.clean() assert not f.exists() assert not io.is_directory(d)
def test_files_dir(self, d): f = a.Files("tmp_dir") f.set_loc(ff(d)) assert not f.exists() f.create_dirs() assert not f.exists() assert io.is_directory(d) f.clean() assert not f.exists() assert not io.is_directory(d)
def _serve_metadata(self, query_params): run = query_params.get('run') if run is None: self.handler.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: self.handler.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: self.handler.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] fpath = self._get_metadata_file_for_tensor(name, config) if not fpath: self.handler.respond( 'Not metadata file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return if not file_io.file_exists(fpath) or file_io.is_directory(fpath): self.handler.respond('%s is not a file' % fpath, 'text/plain', 400) return with file_io.FileIO(fpath, 'r') as f: lines = [] for line in f: lines.append(line) if len(lines) >= LIMIT_NUM_POINTS: break self.handler.respond(''.join(lines), 'text/plain')
def copy_file(src, dest): if not file_io.file_exists(src): raise Exception("Src file doesn't exist at %s" % src) if file_io.is_directory(src): copy_dir(src, dest) return file_io.copy(src, dest, overwrite=True)
def testWriteTransformFn(self): transform_output_dir = os.path.join(self.get_temp_dir(), 'output') with beam.Pipeline() as pipeline: # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) # Combine test metadata with a dict of PCollections resolving futures. deferred_metadata = pipeline | 'CreateDeferredMetadata' >> beam.Create( [test_metadata.COMPLETE_METADATA]) metadata = beam_metadata_io.BeamDatasetMetadata( test_metadata.INCOMPLETE_METADATA, deferred_metadata) _ = ((saved_model_dir_pcoll, metadata) | transform_fn_io.WriteTransformFn(transform_output_dir)) # Test reading with TFTransformOutput tf_transform_output = tft.TFTransformOutput(transform_output_dir) metadata = tf_transform_output.transformed_metadata self.assertEqual(metadata, test_metadata.COMPLETE_METADATA) transform_fn_dir = tf_transform_output.transform_savedmodel_dir self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir))
def __init__(self, dump_root): if not file_io.is_directory(dump_root): raise ValueError("Specified dump_root is not a directory: %s" % dump_root) metadata_paths = file_io.get_matching_files( os.path.join(dump_root, "*.metadata")) if not metadata_paths: raise ValueError("Cannot find any metadata file in directory: %s" % dump_root) elif len(metadata_paths) > 1: raise ValueError( "Unexpected: Found multiple (%d) metadata in directory: %s" % (len(metadata_paths), dump_root)) self._metadata_path = compat.as_bytes(metadata_paths[0]) self._metadata_reader = None prefix = metadata_paths[0][:-len(".metadata")] self._source_files_path = compat.as_bytes("%s.source_files" % prefix) self._stack_frames_path = compat.as_bytes("%s.stack_frames" % prefix) self._graphs_path = compat.as_bytes("%s.graphs" % prefix) self._execution_path = compat.as_bytes("%s.execution" % prefix) self._graph_execution_traces_path = compat.as_bytes( "%s.graph_execution_traces" % prefix) self._readers = dict() # A map from file path to reader. # A map from file path to current reading offset. self._reader_offsets = dict() # Lock for reader creation. self._readers_lock = threading.Lock() # Locks for read operation on individual readers. self._reader_read_locks = dict() self._offsets = dict()
def testWriteTransformFn(self): path = os.path.join(self.get_temp_dir(), 'output') with beam.Pipeline() as pipeline: # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) metadata = _TEST_METADATA deferred_metadata = ( pipeline | 'CreateEmptyProperties' >> beam.Create([_FUTURES_DICT])) _ = ((saved_model_dir_pcoll, (metadata, deferred_metadata)) | transform_fn_io.WriteTransformFn(path)) transformed_metadata_dir = os.path.join(path, 'transformed_metadata') metadata = metadata_io.read_metadata(transformed_metadata_dir) self.assertEqual(metadata, _TEST_METADATA) transform_fn_dir = os.path.join(path, 'transform_fn') self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir))
def save_inference_model(export_dir, inference_model, session=None, replace=True): if session is None: session = tf.get_default_session() assert session is not None if replace and is_directory(export_dir): get_logger().info('replacing %s', export_dir) delete_recursively(export_dir) prediction_signature = predict_signature_def( inputs={INPUTS_KEY: inference_model.inputs_tensor}, outputs={ k: v for k, v in { OUTPUTS_KEY: inference_model.outputs_tensor, LABELS_KEY: inference_model.labels_tensor, COLORS_KEY: inference_model.colors_tensor }.items() if v is not None }) signature_def_map = { DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_signature } legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') builder = SavedModelBuilder(export_dir) builder.add_meta_graph_and_variables(session, [SERVING], signature_def_map=signature_def_map, legacy_init_op=legacy_init_op) builder.save()
def input_fn(files, num_epochs=None, shuffle=False, shared_name=None): # get file names if file_io.is_directory(files[0]): file_names = file_io.get_matching_files(files[0] + '/*tfrecord') else: file_names = files # shuffle if required if shuffle: shuffle_fn(file_names) # queue with the file names that can be shared amongst workers during training filename_queue = tf.FIFOQueue(100, tf.string, shared_name=shared_name) enque_op = filename_queue.enqueue_many([tf.train.limit_epochs(file_names, num_epochs)]) close_op = filename_queue.close(cancel_pending_enqueues=True) # create queue runner and add it to queue runners qr = tf.train.QueueRunner(filename_queue, [enque_op], close_op, queue_closed_exception_types=(tf.errors.OutOfRangeError, tf.errors.CancelledError)) tf.train.add_queue_runner(qr) # read example from file reader = tf.TFRecordReader() _, example = reader.read(filename_queue) # parse example image, ground_truth, example_name = parse_example(example) return image, ground_truth, example_name
def get_summary_dir(job_dir): n = 0 while 1: summary_dir = job_dir + '/train_' + str(n) if not file_io.is_directory(summary_dir): return summary_dir n += 1
def _serve_bookmarks(self, request): run = request.args.get('run') if not run: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: "%s"' % run, 'text/plain', 400) config = self.configs[run] fpath = self._get_bookmarks_file_for_tensor(name, config) if not fpath: return Respond( request, 'No bookmarks file found for tensor "%s" in the config file "%s"' % (name, self.config_fpaths[run]), 'text/plain', 400) fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run]) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '"%s" not found, or is not a file' % fpath, 'text/plain', 400) bookmarks_json = None with file_io.FileIO(fpath, 'rb') as f: bookmarks_json = f.read() return Respond(request, bookmarks_json, 'application/json')
def testWriteTransformFn(self): transform_output_dir = os.path.join(self.get_temp_dir(), 'output') with beam.Pipeline() as pipeline: # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) metadata = beam_metadata_io.BeamDatasetMetadata( _TEST_METADATA_WITH_FUTURES, { 'a': pipeline | 'CreateA' >> beam.Create([3]), }) _ = ((saved_model_dir_pcoll, metadata) | transform_fn_io.WriteTransformFn(transform_output_dir)) # Test reading with TFTransformOutput tf_transform_output = tft.TFTransformOutput(transform_output_dir) metadata = tf_transform_output.transformed_metadata self.assertEqual(metadata, _TEST_METADATA) transform_fn_dir = tf_transform_output.transform_savedmodel_dir self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir))
def __init__(self, dump_root): if not file_io.is_directory(dump_root): raise ValueError("Specified dump_root is not a directory: %s" % dump_root) self._dump_root = dump_root self._metadata_paths = self._load_metadata_files() prefixes = [ metadata_path[:-len(self._METADATA_SUFFIX)] for metadata_path in self._metadata_paths ] prefix = prefixes[0] # This is the prefix of the main file set. self._source_files_path = compat.as_bytes(prefix + self._SOURCE_FILE_SUFFIX) self._stack_frames_path = compat.as_bytes(prefix + self._STACK_FRAMES_SUFFIX) self._graphs_path = compat.as_bytes(prefix + self._GRAPHS_SUFFIX) self._execution_path = compat.as_bytes(prefix + self._EXECUTION_SUFFIX) # There can be multiple .graph_execution_trace files each belonging # to a file set generated on an individual host, in the case of # a distributed TensorFlow job. # This is different from the other debug event files in the file set. self._graph_execution_traces_paths = [ compat.as_bytes(prefix + self._GRAPH_EXECUTION_TRACES_SUFFIX) for prefix in prefixes ] self._readers = dict() # A map from file path to reader. # A map from file path to current reading offset. self._reader_offsets = dict() # Lock for reader creation. self._readers_lock = threading.Lock() # Locks for read operation on individual readers. self._reader_read_locks = dict() self._offsets = dict()
def create_dir_test(): """Verifies file_io directory handling methods .""" starttime = int(round(time.time() * 1000)) dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime) print("Creating dir %s" % dir_name) file_io.create_dir(dir_name) elapsed = int(round(time.time() * 1000)) - starttime print("Created directory in: %d milliseconds" % elapsed) # Check that the directory exists. dir_exists = file_io.is_directory(dir_name) print("%s directory exists: %s" % (dir_name, dir_exists)) # List contents of just created directory. print("Listing directory %s." % dir_name) starttime = int(round(time.time() * 1000)) print(file_io.list_directory(dir_name)) elapsed = int(round(time.time() * 1000)) - starttime print("Listed directory %s in %s milliseconds" % (dir_name, elapsed)) # Delete directory. print("Deleting directory %s." % dir_name) starttime = int(round(time.time() * 1000)) file_io.delete_recursively(dir_name) elapsed = int(round(time.time() * 1000)) - starttime print("Deleted directory %s in %s milliseconds" % (dir_name, elapsed))
def _serve_bookmarks(self, query_params): run = query_params.get('run') if not run: self.handler.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: self.handler.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: self.handler.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] fpath = self._get_bookmarks_file_for_tensor(name, config) if not fpath: self.handler.respond( 'No bookmarks file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return if not file_io.file_exists(fpath) or file_io.is_directory(fpath): self.handler.respond('%s is not a file' % fpath, 'text/plain', 400) return bookmarks_json = None with file_io.FileIO(fpath, 'r') as f: bookmarks_json = f.read() self.handler.respond(bookmarks_json, 'application/json')
def _serve_sprite_image(self, request): run = request.args.get('run') if not run: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: "%s"' % run, 'text/plain', 400) config = self.configs[run] embedding_info = self._get_embedding(name, config) if not embedding_info or not embedding_info.sprite.image_path: return Respond( request, 'No sprite image file found for tensor "%s" in the config file "%s"' % (name, self.config_fpaths[run]), 'text/plain', 400) fpath = os.path.expanduser(embedding_info.sprite.image_path) fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run]) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '"%s" does not exist or is directory' % fpath, 'text/plain', 400) f = file_io.FileIO(fpath, 'rb') encoded_image_string = f.read() f.close() image_type = imghdr.what(None, encoded_image_string) mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE) return Respond(request, encoded_image_string, mime_type)
def testWriteTransformFn(self): path = os.path.join(self.get_temp_dir(), 'output') with beam.Pipeline() as pipeline: # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) metadata = beam_metadata_io.BeamDatasetMetadata( _TEST_METADATA_WITH_FUTURES, { 'a': pipeline | 'CreateA' >> beam.Create([3]), }) _ = ((saved_model_dir_pcoll, metadata) | transform_fn_io.WriteTransformFn(path)) transformed_metadata_dir = os.path.join( path, transform_fn_io.TRANSFORMED_METADATA_DIR) metadata = metadata_io.read_metadata(transformed_metadata_dir) self.assertEqual(metadata, _TEST_METADATA) transform_fn_dir = os.path.join(path, transform_fn_io.TRANSFORM_FN_DIR) self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir))
def _delete_file_or_dir(pathname): if file_io.is_directory(pathname): file_io.delete_recursively(pathname) print(f'Deleted backup dir {pathname}') elif file_io.file_exists(pathname): file_io.delete_file(pathname) print(f'Deleted backup file {pathname}')
def _load_tf_custom_op(model_path): """Loads a custom TF OP (in .so format) from /assets.extra directory.""" assets_dir = os.path.join(model_path, _CUSTOM_OP_DIRECTORY_NAME) if file_io.is_directory(assets_dir): custom_ops_pattern = os.path.join(assets_dir, _CUSTOM_OP_SUFFIX) for custom_op_path_original in file_io.get_matching_files( custom_ops_pattern): logging.info("Found custom op file: %s", custom_op_path_original) if custom_op_path_original.startswith("gs://"): if not os.path.isdir(_CUSTOM_OP_LOCAL_DIR): os.makedirs(_CUSTOM_OP_LOCAL_DIR) custom_op_path_local = os.path.join( _CUSTOM_OP_LOCAL_DIR, os.path.basename(custom_op_path_original)) logging.info("Copying custop op from: %s to: %s", custom_op_path_original, custom_op_path_local) file_io.copy(custom_op_path_original, custom_op_path_local, True) else: custom_op_path_local = custom_op_path_original try: import tensorflow as tf # pylint: disable=g-import-not-at-top logging.info("Loading custom op: %s", custom_op_path_local) logging.info("TF Version: %s", tf.__version__) tf.load_op_library(custom_op_path_local) except RuntimeError as e: logging.exception( "Failed to load custom op: %s with error: %s. Prediction " "will likely fail due to missing operations.", custom_op_path_local, e)
def down(id, cloud_path=None): shard = get_shard(id) to_path = "%s/%d" % (emb_path, shard) if not os.path.exists(to_path): try: os.mkdir(to_path) except OSError as e: if e.errno != 17: # File exists raise e to_filepath = "%s/%d.emb" % (to_path, id) url = 'http://ml.daangn.com/articles/image_embeddings/%s' % id_to_path(id) logging.info('down: %s', url) result = call( ['curl', '-f', '--connect-timeout', '2', '-o', to_filepath, url]) if not os.path.exists(to_filepath): return 0 if os.stat(to_filepath).st_size < 1: os.remove(filepath) return 0 if cloud_path: to_gs_filepath = '%s/%s' % (cloud_path, to_filepath) if file_io.file_exists(to_gs_filepath): return 0 to_gs_path = '%s/%s' % (cloud_path, to_path) if not file_io.is_directory(to_gs_path): file_io.create_dir(to_gs_path) file_io.copy(to_filepath, to_gs_filepath) return 1
def _serve_sprite_image(self, request, query_params): run = query_params.get('run') if not run: request.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: request.respond('query parameter "name" is required', 'text/plain', 400) return if run not in self.configs: request.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] embedding_info = self._get_embedding(name, config) if not embedding_info or not embedding_info.sprite.image_path: request.respond( 'No sprite image file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return fpath = embedding_info.sprite.image_path if not file_io.file_exists(fpath) or file_io.is_directory(fpath): request.respond( '%s does not exist or is directory' % fpath, 'text/plain', 400) return f = file_io.FileIO(fpath, 'r') encoded_image_string = f.read() f.close() image_type = imghdr.what(None, encoded_image_string) mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE) request.respond(encoded_image_string, mime_type)
def _serve_bookmarks(self, request): run = request.args.get('run') if not run: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: %s' % run, 'text/plain', 400) config = self.configs[run] fpath = self._get_bookmarks_file_for_tensor(name, config) if not fpath: return Respond( request, 'No bookmarks file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '%s is not a file' % fpath, 'text/plain', 400) bookmarks_json = None with file_io.FileIO(fpath, 'rb') as f: bookmarks_json = f.read() return Respond(request, bookmarks_json, 'application/json')
def _serve_sprite_image(self, request): run = request.args.get('run') if not run: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: %s' % run, 'text/plain', 400) config = self.configs[run] embedding_info = self._get_embedding(name, config) if not embedding_info or not embedding_info.sprite.image_path: return Respond( request, 'No sprite image file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) fpath = os.path.expanduser(embedding_info.sprite.image_path) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '%s does not exist or is directory' % fpath, 'text/plain', 400) f = file_io.FileIO(fpath, 'rb') encoded_image_string = f.read() f.close() image_type = imghdr.what(None, encoded_image_string) mime_type = _IMGHDR_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE) return Respond(request, encoded_image_string, mime_type)
def _get_tfrecord_filenames(dir_path): assert isinstance(dir_path, str), "dir_path is not a String: %r" % dir_path assert file_io.file_exists(dir_path), "directory `%s` does not exist" % dir_path assert file_io.is_directory(dir_path), "`%s` is not a directory" % dir_path flist = file_io.list_directory(dir_path) input_files = [pjoin(dir_path, x) for x in filter(lambda f: not f.startswith("_"), flist)] filenames = tf.placeholder_with_default(input_files, shape=[None]) return filenames
def _get_checkpoint_filename(filepath): from tensorflow.python.lib.io.file_io import is_directory from tensorflow.python.training.checkpoint_management import latest_checkpoint if is_directory(filepath): filepath = latest_checkpoint(filepath) if filepath is None: raise ValueError("Couldn't find 'checkpoint' file or checkpoints in " "given directory %s" % filepath) return filepath
def copy_dir(src, dest): if not file_io.file_exists(src): raise Exception("Src dir doesn't exist at %s" % src) if not file_io.is_directory(src): copy_file(src, dest) return if not file_io.file_exists(dest): file_io.create_dir(dest) for filename in file_io.list_directory(src): new_src = os.path.join(src, filename) new_dest = os.path.join(dest, filename) copy_file(new_src, new_dest)
def set_latest_nr(self): logs_dir = '{prefix}/runs/{run_id}/_logs/'.format( prefix=os.environ["PREFIX"], run_id=self.run_id, ) if io.is_directory(logs_dir): nrs = [ int(fn) for fn in io.list_directory(logs_dir) if fn.isdigit() ] self.latest_nr = sorted(nrs)[-1] + 1 if nrs else 1 else: self.latest_nr = 1
def testWriteTransformFnIsRetryable(self): tft.test_case.skip_if_external_environment( 'Retries are currently not available on this environment.') original_copy_tree_to_unique_temp_dir = ( transform_fn_io._copy_tree_to_unique_temp_dir) def mock_copy_tree_to_unique_temp_dir(source, base_temp_dir_path): """Mocks transform_fn_io._copy_tree to fail the first time it is called by this test, thus forcing a retry which should succeed.""" global _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED if not _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED: _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED = True original_copy_tree_to_unique_temp_dir(source, base_temp_dir_path) raise ArithmeticError('Some error') return original_copy_tree_to_unique_temp_dir( source, base_temp_dir_path) with self._makeTestPipeline() as pipeline: transform_output_dir = os.path.join(self.get_temp_dir(), 'output') # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_path = os.path.join(saved_model_dir, 'saved_model') with file_io.FileIO(saved_model_path, mode='w') as f: f.write('some content') saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) # Combine test metadata with a dict of PCollections resolving futures. deferred_metadata = pipeline | 'CreateDeferredMetadata' >> beam.Create( [test_metadata.COMPLETE_METADATA]) metadata = beam_metadata_io.BeamDatasetMetadata( test_metadata.INCOMPLETE_METADATA, deferred_metadata) with mock.patch.object(transform_fn_io, '_copy_tree_to_unique_temp_dir', mock_copy_tree_to_unique_temp_dir): _ = ((saved_model_dir_pcoll, metadata) | transform_fn_io.WriteTransformFn(transform_output_dir)) # Test reading with TFTransformOutput tf_transform_output = tft.TFTransformOutput(transform_output_dir) metadata = tf_transform_output.transformed_metadata self.assertEqual(metadata, test_metadata.COMPLETE_METADATA) transform_fn_dir = tf_transform_output.transform_savedmodel_dir self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir)) # Check temp directory created by failed run was cleaned up. self.assertEqual(2, len(file_io.list_directory(transform_output_dir)))
def make_tfrecords(brats_root, year, output_directory, ids, sequential=False): if not file_io.is_directory(output_directory): logger.debug("Creating output directory: %s" % output_directory) try: file_io.create_dir(output_directory) except FileExistsError: logger.debug("Output directory exists: %s" % output_directory) if sequential: for patient_id in ids: make_tfrecord(brats_root, year, output_directory, patient_id) else: pool = mp.Pool(pool_size) arg_list = [(brats_root, year, output_directory, patient_id) for patient_id in ids] pool.map(_make_tfrecord_shell, arg_list)
def recursive_copy(src_dir, dest_dir): """Copy the contents of src_dir into the folder dest_dir. Args: src_dir: gsc or local path. dest_dir: gcs or local path. """ file_io.recursive_create_dir(dest_dir) for file_name in file_io.list_directory(src_dir): old_path = os.path.join(src_dir, file_name) new_path = os.path.join(dest_dir, file_name) if file_io.is_directory(old_path): recursive_copy(old_path, new_path) else: file_io.copy(old_path, new_path, overwrite=True)
def __get_tf_record_spec_path(tf_record_desc_path, dir_path): if tf_record_desc_path is not None: assert isinstance(tf_record_desc_path, str), \ "tf_record_desc_path is not a String: %r" % tf_record_desc_path assert file_io.file_exists(tf_record_desc_path), \ "feature desc `%s` does not exist" % tf_record_desc_path return tf_record_desc_path assert isinstance(dir_path, str), "dir_path is not a String: %r" % dir_path assert file_io.file_exists( dir_path), "directory `%s` does not exist" % dir_path assert file_io.is_directory( dir_path), "`%s` is not a directory" % dir_path from os.path import join as pjoin default_tf_record_spec_filename = "_tf_record_spec.json" return pjoin(dir_path, default_tf_record_spec_filename)
def visualize_embedding(log_dir): ## setting dir model_dir = path.join(log_dir, 'w2v_model') tb_dir = path.join(model_dir, 'tb') embed_dir = path.join(tb_dir, 'embeddings') meta_dir = path.join(embed_dir, 'metadata.tsv') ## make dir if not file_io.is_directory(embed_dir): file_io.create_dir(embed_dir) ## import model and embedding matrix model = ImportGraph(model_dir) embedding, softmax_w, softmax_b = model.run() embedding = tf.Variable(embedding, name='embedding') ## get meta data from bigquery cms and save with open(log_dir + '/word_dic.pkl', 'rb') as fp: int_to_word, word_to_int = pickle.load(fp) pd.Series([x[1] for x in int_to_word.items()]).to_csv(meta_dir, sep='\t', index=False) ## sess start init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) ## saver and writer saver = tf.train.Saver() writer = tf.summary.FileWriter(embed_dir, sess.graph) ## embedding config = projector.ProjectorConfig() embedding_conf = config.embeddings.add() embedding_conf.tensor_name = embedding.name embedding_conf.metadata_path = meta_dir projector.visualize_embeddings(writer, config) saver.save(sess, embed_dir + '/embedding.ckpt') ## sess close sess.close() print( 'Run `tensorboard --logdir={0}` to run visualize result on tensorboard' .format(tb_dir))
def _serve_metadata(self, request): run = request.args.get('run') if run is None: return Respond(request, 'query parameter "run" is required', 'text/plain', 400) name = request.args.get('name') if name is None: return Respond(request, 'query parameter "name" is required', 'text/plain', 400) num_rows = _parse_positive_int_param(request, 'num_rows') if num_rows == -1: return Respond(request, 'query parameter num_rows must be integer > 0', 'text/plain', 400) if run not in self.configs: return Respond(request, 'Unknown run: "%s"' % run, 'text/plain', 400) config = self.configs[run] fpath = self._get_metadata_file_for_tensor(name, config) if not fpath: return Respond( request, 'No metadata file found for tensor "%s" in the config file "%s"' % (name, self.config_fpaths[run]), 'text/plain', 400) fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run]) if not file_io.file_exists(fpath) or file_io.is_directory(fpath): return Respond(request, '"%s" not found, or is not a file' % fpath, 'text/plain', 400) num_header_rows = 0 with file_io.FileIO(fpath, 'r') as f: lines = [] # Stream reading the file with early break in case the file doesn't fit in # memory. for line in f: lines.append(line) if len(lines) == 1 and '\t' in lines[0]: num_header_rows = 1 if num_rows and len(lines) >= num_rows + num_header_rows: break return Respond(request, ''.join(lines), 'text/plain')
def _recursive_copy(src_dir, dest_dir): """Copy the contents of src_dir into the folder dest_dir. Args: src_dir: gsc or local path. dest_dir: gcs or local path. When called, dest_dir should exist. """ src_dir = python_portable_string(src_dir) dest_dir = python_portable_string(dest_dir) file_io.recursive_create_dir(dest_dir) for file_name in file_io.list_directory(src_dir): old_path = os.path.join(src_dir, file_name) new_path = os.path.join(dest_dir, file_name) if file_io.is_directory(old_path): _recursive_copy(old_path, new_path) else: file_io.copy(old_path, new_path, overwrite=True)
def _serve_metadata(self, request, query_params): run = query_params.get('run') if run is None: request.respond('query parameter "run" is required', 'text/plain', 400) return name = query_params.get('name') if name is None: request.respond('query parameter "name" is required', 'text/plain', 400) return num_rows = _parse_positive_int_param(request, query_params, 'num_rows') if num_rows == -1: return if run not in self.configs: request.respond('Unknown run: %s' % run, 'text/plain', 400) return config = self.configs[run] fpath = self._get_metadata_file_for_tensor(name, config) if not fpath: request.respond( 'No metadata file found for tensor %s in the config file %s' % (name, self.config_fpaths[run]), 'text/plain', 400) return if not file_io.file_exists(fpath) or file_io.is_directory(fpath): request.respond('%s is not a file' % fpath, 'text/plain', 400) return num_header_rows = 0 with file_io.FileIO(fpath, 'r') as f: lines = [] # Stream reading the file with early break in case the file doesn't fit in # memory. for line in f: lines.append(line) if len(lines) == 1 and '\t' in lines[0]: num_header_rows = 1 if num_rows and len(lines) >= num_rows + num_header_rows: break request.respond(''.join(lines), 'text/plain')
def load_library(library_location): """Loads a TensorFlow plugin. "library_location" can be a path to a specific shared object, or a folder. If it is a folder, all sahred objects that are named "libtfkernel*" will be loaded. When the library is loaded, kernels registered in the library via the `REGISTER_*` macros are made available in the TensorFlow process. Args: library_location: Path to the plugin or the folder of plugins. Relative or absolute filesystem path to a dynamic library file or folder. Returns: None Raises: OSError: When the file to be loaded is not found. RuntimeError: when unable to load the library. """ if file_io.file_exists(library_location): if file_io.is_directory(library_location): directory_contents = file_io.list_directory(library_location) kernel_libraries = [ os.path.join(library_location, f) for f in directory_contents if _is_shared_object(f)] else: kernel_libraries = [library_location] for lib in kernel_libraries: py_tf.TF_LoadLibrary(lib) else: raise OSError( errno.ENOENT, 'The file or folder to load kernel libraries from does not exist.', library_location)
def export_fn(estimator, export_dir_base, checkpoint_path=None, eval_result=None): with ops.Graph().as_default() as g: contrib_variables.create_global_step(g) input_ops = serving_from_csv_input(train_config, args, keep_target) model_fn_ops = estimator._call_model_fn(input_ops.features, None, model_fn_lib.ModeKeys.INFER) output_fetch_tensors = make_output_tensors( train_config=train_config, args=args, input_ops=input_ops, model_fn_ops=model_fn_ops, keep_target=keep_target) signature_def_map = { 'serving_default': signature_def_utils.predict_signature_def(input_ops.default_inputs, output_fetch_tensors) } if not checkpoint_path: # Locate the latest checkpoint checkpoint_path = saver.latest_checkpoint(estimator._model_dir) if not checkpoint_path: raise NotFittedError("Couldn't find trained model at %s." % estimator._model_dir) export_dir = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) with tf_session.Session('') as session: # variables.initialize_local_variables() variables.local_variables_initializer() data_flow_ops.tables_initializer() saver_for_restore = saver.Saver( variables.global_variables(), sharded=True) saver_for_restore.restore(session, checkpoint_path) init_op = control_flow_ops.group( variables.local_variables_initializer(), data_flow_ops.tables_initializer()) # Perform the export builder = saved_model_builder.SavedModelBuilder(export_dir) builder.add_meta_graph_and_variables( session, [tag_constants.SERVING], signature_def_map=signature_def_map, assets_collection=ops.get_collection( ops.GraphKeys.ASSET_FILEPATHS), legacy_init_op=init_op) builder.save(False) # Add the extra assets if assets_extra: assets_extra_path = os.path.join(compat.as_bytes(export_dir), compat.as_bytes('assets.extra')) for dest_relative, source in assets_extra.items(): dest_absolute = os.path.join(compat.as_bytes(assets_extra_path), compat.as_bytes(dest_relative)) dest_path = os.path.dirname(dest_absolute) gfile.MakeDirs(dest_path) gfile.Copy(source, dest_absolute) # only keep the last 3 models saved_model_export_utils.garbage_collect_exports( python_portable_string(export_dir_base), exports_to_keep=3) # save the last model to the model folder. # export_dir_base = A/B/intermediate_models/ if keep_target: final_dir = os.path.join(args.job_dir, 'evaluation_model') else: final_dir = os.path.join(args.job_dir, 'model') if file_io.is_directory(final_dir): file_io.delete_recursively(final_dir) file_io.recursive_create_dir(final_dir) _recursive_copy(export_dir, final_dir) return export_dir
def is_directory(cls, dirname): return file_io.is_directory(dirname)
def create_dir_test(): """Verifies file_io directory handling methods.""" # Test directory creation. starttime_ms = int(round(time.time() * 1000)) dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime_ms) print("Creating dir %s" % dir_name) file_io.create_dir(dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Created directory in: %d milliseconds" % elapsed_ms) # Check that the directory exists. dir_exists = file_io.is_directory(dir_name) assert dir_exists print("%s directory exists: %s" % (dir_name, dir_exists)) # Test recursive directory creation. starttime_ms = int(round(time.time() * 1000)) recursive_dir_name = "%s/%s/%s" % (dir_name, "nested_dir1", "nested_dir2") print("Creating recursive dir %s" % recursive_dir_name) file_io.recursive_create_dir(recursive_dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Created directory recursively in: %d milliseconds" % elapsed_ms) # Check that the directory exists. recursive_dir_exists = file_io.is_directory(recursive_dir_name) assert recursive_dir_exists print("%s directory exists: %s" % (recursive_dir_name, recursive_dir_exists)) # Create some contents in the just created directory and list the contents. num_files = 10 files_to_create = ["file_%d.txt" % n for n in range(num_files)] for file_num in files_to_create: file_name = "%s/%s" % (dir_name, file_num) print("Creating file %s." % file_name) file_io.write_string_to_file(file_name, "test file.") print("Listing directory %s." % dir_name) starttime_ms = int(round(time.time() * 1000)) directory_contents = file_io.list_directory(dir_name) print(directory_contents) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Listed directory %s in %s milliseconds" % (dir_name, elapsed_ms)) assert set(directory_contents) == set(files_to_create + ["nested_dir1/"]) # Test directory renaming. dir_to_rename = "%s/old_dir" % dir_name new_dir_name = "%s/new_dir" % dir_name file_io.create_dir(dir_to_rename) assert file_io.is_directory(dir_to_rename) assert not file_io.is_directory(new_dir_name) starttime_ms = int(round(time.time() * 1000)) print("Will try renaming directory %s to %s" % (dir_to_rename, new_dir_name)) file_io.rename(dir_to_rename, new_dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Renamed directory %s to %s in %s milliseconds" % ( dir_to_rename, new_dir_name, elapsed_ms)) assert not file_io.is_directory(dir_to_rename) assert file_io.is_directory(new_dir_name) # Test Delete directory recursively. print("Deleting directory recursively %s." % dir_name) starttime_ms = int(round(time.time() * 1000)) file_io.delete_recursively(dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms dir_exists = file_io.is_directory(dir_name) assert not dir_exists print("Deleted directory recursively %s in %s milliseconds" % ( dir_name, elapsed_ms))
def export_fn(estimator, export_dir_base, checkpoint_path=None, eval_result=None): with ops.Graph().as_default() as g: contrib_variables.create_global_step(g) input_ops = feature_transforms.build_csv_serving_tensors_for_training_step( args.analysis, features, schema, stats, keep_target) model_fn_ops = estimator._call_model_fn(input_ops.features, None, model_fn_lib.ModeKeys.INFER) output_fetch_tensors = make_prediction_output_tensors( args=args, features=features, input_ops=input_ops, model_fn_ops=model_fn_ops, keep_target=keep_target) # Don't use signature_def_utils.predict_signature_def as that renames # tensor names if there is only 1 input/output tensor! signature_inputs = {key: tf.saved_model.utils.build_tensor_info(tensor) for key, tensor in six.iteritems(input_ops.default_inputs)} signature_outputs = {key: tf.saved_model.utils.build_tensor_info(tensor) for key, tensor in six.iteritems(output_fetch_tensors)} signature_def_map = { 'serving_default': signature_def_utils.build_signature_def( signature_inputs, signature_outputs, tf.saved_model.signature_constants.PREDICT_METHOD_NAME)} if not checkpoint_path: # Locate the latest checkpoint checkpoint_path = saver.latest_checkpoint(estimator._model_dir) if not checkpoint_path: raise ValueError("Couldn't find trained model at %s." % estimator._model_dir) export_dir = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) if (model_fn_ops.scaffold is not None and model_fn_ops.scaffold.saver is not None): saver_for_restore = model_fn_ops.scaffold.saver else: saver_for_restore = saver.Saver(sharded=True) with tf_session.Session('') as session: saver_for_restore.restore(session, checkpoint_path) init_op = control_flow_ops.group( variables.local_variables_initializer(), resources.initialize_resources(resources.shared_resources()), tf.tables_initializer()) # Perform the export builder = saved_model_builder.SavedModelBuilder(export_dir) builder.add_meta_graph_and_variables( session, [tag_constants.SERVING], signature_def_map=signature_def_map, assets_collection=ops.get_collection( ops.GraphKeys.ASSET_FILEPATHS), legacy_init_op=init_op) builder.save(False) # Add the extra assets if assets_extra: assets_extra_path = os.path.join(compat.as_bytes(export_dir), compat.as_bytes('assets.extra')) for dest_relative, source in assets_extra.items(): dest_absolute = os.path.join(compat.as_bytes(assets_extra_path), compat.as_bytes(dest_relative)) dest_path = os.path.dirname(dest_absolute) file_io.recursive_create_dir(dest_path) file_io.copy(source, dest_absolute) # only keep the last 3 models saved_model_export_utils.garbage_collect_exports( export_dir_base, exports_to_keep=3) # save the last model to the model folder. # export_dir_base = A/B/intermediate_models/ if keep_target: final_dir = os.path.join(args.job_dir, 'evaluation_model') else: final_dir = os.path.join(args.job_dir, 'model') if file_io.is_directory(final_dir): file_io.delete_recursively(final_dir) file_io.recursive_create_dir(final_dir) recursive_copy(export_dir, final_dir) return export_dir