def create_dir_test(): """Verifies file_io directory handling methods .""" starttime = int(round(time.time() * 1000)) dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime) print("Creating dir %s" % dir_name) file_io.create_dir(dir_name) elapsed = int(round(time.time() * 1000)) - starttime print("Created directory in: %d milliseconds" % elapsed) # Check that the directory exists. dir_exists = file_io.is_directory(dir_name) print("%s directory exists: %s" % (dir_name, dir_exists)) # List contents of just created directory. print("Listing directory %s." % dir_name) starttime = int(round(time.time() * 1000)) print(file_io.list_directory(dir_name)) elapsed = int(round(time.time() * 1000)) - starttime print("Listed directory %s in %s milliseconds" % (dir_name, elapsed)) # Delete directory. print("Deleting directory %s." % dir_name) starttime = int(round(time.time() * 1000)) file_io.delete_recursively(dir_name) elapsed = int(round(time.time() * 1000)) - starttime print("Deleted directory %s in %s milliseconds" % (dir_name, elapsed))
def testListDirectory(self): dir_path = os.path.join(self._base_dir, "test_dir") file_io.create_dir(dir_path) files = ["file1.txt", "file2.txt", "file3.txt"] for name in files: file_path = os.path.join(dir_path, name) file_io.FileIO(file_path, mode="w").write("testing") subdir_path = os.path.join(dir_path, "sub_dir") file_io.create_dir(subdir_path) subdir_file_path = os.path.join(subdir_path, "file4.txt") file_io.FileIO(subdir_file_path, mode="w").write("testing") dir_list = file_io.list_directory(dir_path) self.assertItemsEqual(files + ["sub_dir"], dir_list)
def testListDirectory(self): dir_path = os.path.join(self._base_dir, "test_dir") file_io.create_dir(dir_path) files = [b"file1.txt", b"file2.txt", b"file3.txt"] for name in files: file_path = os.path.join(dir_path, compat.as_str_any(name)) file_io.write_string_to_file(file_path, "testing") subdir_path = os.path.join(dir_path, "sub_dir") file_io.create_dir(subdir_path) subdir_file_path = os.path.join(subdir_path, "file4.txt") file_io.write_string_to_file(subdir_file_path, "testing") dir_list = file_io.list_directory(dir_path) self.assertItemsEqual(files + [b"sub_dir"], dir_list)
def testListDirectory(self, join): dir_path = join(self._base_dir, "test_dir") file_io.create_dir(dir_path) files = ["file1.txt", "file2.txt", "file3.txt"] for name in files: file_path = join(str(dir_path), name) file_io.FileIO(file_path, mode="w").write("testing") subdir_path = join(str(dir_path), "sub_dir") file_io.create_dir(subdir_path) subdir_file_path = join(str(subdir_path), "file4.txt") file_io.FileIO(subdir_file_path, mode="w").write("testing") dir_list = file_io.list_directory(dir_path) self.assertItemsEqual(files + ["sub_dir"], dir_list)
def _get_tfrecord_filenames(dir_path): assert isinstance(dir_path, str), "dir_path is not a String: %r" % dir_path assert file_io.file_exists( dir_path), "directory `%s` does not exist" % dir_path assert file_io.is_directory( dir_path), "`%s` is not a directory" % dir_path flist = file_io.list_directory(dir_path) input_files = [ pjoin(dir_path, x) for x in filter(lambda f: not f.startswith("_"), flist) ] filenames = tf.placeholder_with_default(input_files, shape=[None]) return filenames
def get_record_id_map(brats_tfrecords_dir): """ Get the mapping from patient_id --> TFRecord file :param brats_tfrecords_dir: directory containing all TFRecords :return: Dictionary mapping patient_id to TFRecord file """ tfrecord_filenames = file_io.list_directory(brats_tfrecords_dir) id_record_map = {} for file_name in tfrecord_filenames: patient_id = get_id_of_TFRecord(file_name) id_record_map[patient_id] = os.path.join(brats_tfrecords_dir, file_name) return id_record_map
def testWriteTransformFnIsRetryable(self): tft.test_case.skip_if_external_environment( 'Retries are currently not available on this environment.') original_copy_tree_to_unique_temp_dir = ( transform_fn_io._copy_tree_to_unique_temp_dir) def mock_copy_tree_to_unique_temp_dir(source, base_temp_dir_path): """Mocks transform_fn_io._copy_tree to fail the first time it is called by this test, thus forcing a retry which should succeed.""" global _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED if not _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED: _COPY_TREE_TO_UNIQUE_TEMP_DIR_CALLED = True original_copy_tree_to_unique_temp_dir(source, base_temp_dir_path) raise ArithmeticError('Some error') return original_copy_tree_to_unique_temp_dir( source, base_temp_dir_path) with self._makeTestPipeline() as pipeline: transform_output_dir = os.path.join(self.get_temp_dir(), 'output') # Create an empty directory for the source saved model dir. saved_model_dir = os.path.join(self.get_temp_dir(), 'source') file_io.recursive_create_dir(saved_model_dir) saved_model_path = os.path.join(saved_model_dir, 'saved_model') with file_io.FileIO(saved_model_path, mode='w') as f: f.write('some content') saved_model_dir_pcoll = ( pipeline | 'CreateSavedModelDir' >> beam.Create([saved_model_dir])) # Combine test metadata with a dict of PCollections resolving futures. deferred_metadata = pipeline | 'CreateDeferredMetadata' >> beam.Create( [test_metadata.COMPLETE_METADATA]) metadata = beam_metadata_io.BeamDatasetMetadata( test_metadata.INCOMPLETE_METADATA, deferred_metadata) with mock.patch.object(transform_fn_io, '_copy_tree_to_unique_temp_dir', mock_copy_tree_to_unique_temp_dir): _ = ((saved_model_dir_pcoll, metadata) | transform_fn_io.WriteTransformFn(transform_output_dir)) # Test reading with TFTransformOutput tf_transform_output = tft.TFTransformOutput(transform_output_dir) metadata = tf_transform_output.transformed_metadata self.assertEqual(metadata, test_metadata.COMPLETE_METADATA) transform_fn_dir = tf_transform_output.transform_savedmodel_dir self.assertTrue(file_io.file_exists(transform_fn_dir)) self.assertTrue(file_io.is_directory(transform_fn_dir)) # Check temp directory created by failed run was cleaned up. self.assertEqual(2, len(file_io.list_directory(transform_output_dir)))
def proc_tensorboard_works_with_same_file_path(test_obj, saving_filepath): model, _, train_ds, steps = _model_setup(test_obj, file_format='') num_epoch = 2 # The saving_filepath shouldn't exist at the beginning (as it's unique). test_obj.assertFalse(file_io.file_exists(saving_filepath)) model.fit( x=train_ds, epochs=num_epoch, steps_per_epoch=steps, callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)]) test_obj.assertTrue(file_io.list_directory(saving_filepath))
def recursive_copy(src_dir, dest_dir): """Copy the contents of src_dir into the folder dest_dir. Args: src_dir: gsc or local path. dest_dir: gcs or local path. """ file_io.recursive_create_dir(dest_dir) for file_name in file_io.list_directory(src_dir): old_path = os.path.join(src_dir, file_name) new_path = os.path.join(dest_dir, file_name) if file_io.is_directory(old_path): recursive_copy(old_path, new_path) else: file_io.copy(old_path, new_path, overwrite=True)
def main(): tf.logging.set_verbosity(tf.logging.INFO) args = parse_arguments() args.slice_columns = [ column for column in column_group.split(',') for column_group in args.slice_columns ] schema = json.loads(file_io.read_file_to_string(args.schema)) eval_model_parent_dir = os.path.join(args.model, 'tfma_eval_model_dir') model_export_dir = os.path.join(eval_model_parent_dir, file_io.list_directory(eval_model_parent_dir)[0]) run_analysis(args.output, model_export_dir, args.eval, schema, args.project, args.mode, args.slice_columns) generate_static_html_output(args.output, args.slice_columns) with open('/output.txt', 'w') as f: f.write(args.output)
def list_odps_table_oss_files(table_name): table_data_meta = os.path.join(FLAGS.buckets, table_name, '.odps/.meta') print(table_data_meta) meta_str = file_io.read_file_to_string(table_data_meta) print('meta_str:', meta_str) meta_dict = json.loads(meta_str) table_data_dirs = meta_dict['dirs'] table_data_dirs = [os.path.join(FLAGS.buckets, table_name, '.odps', d) for d in table_data_dirs] all_files = [] for data_dir in table_data_dirs: print("data_dir:", data_dir) tmp_files = file_io.list_directory(data_dir) for fname in tmp_files: all_files.append(os.path.join(data_dir, fname)) return all_files
def _recursive_copy(src_dir, dest_dir): """Copy the contents of src_dir into the folder dest_dir. Args: src_dir: gsc or local path. dest_dir: gcs or local path. When called, dest_dir should exist. """ src_dir = python_portable_string(src_dir) dest_dir = python_portable_string(dest_dir) file_io.recursive_create_dir(dest_dir) for file_name in file_io.list_directory(src_dir): old_path = os.path.join(src_dir, file_name) new_path = os.path.join(dest_dir, file_name) if file_io.is_directory(old_path): _recursive_copy(old_path, new_path) else: file_io.copy(old_path, new_path, overwrite=True)
def load_paths_only(path): print("Parsing data...") gui_paths = [] img_paths = [] for f in file_io.list_directory(path): if f.find(".gui") != -1: path_gui = "{}/{}".format(path, f) gui_paths.append(path_gui) file_name = f[:f.find(".gui")] if file_io.file_exists("{}/{}.png".format(path, file_name)): path_img = "{}/{}.png".format(path, file_name) img_paths.append(path_img) elif file_io.file_exists("{}/{}.npz".format(path, file_name)): path_img = "{}/{}.npz".format(path, file_name) img_paths.append(path_img) assert len(gui_paths) == len(img_paths) return gui_paths, img_paths
def generator(subdir, batch_size): desired_size = 224 file_names = [(desired_size, subdir, fn) for fn in file_io.list_directory(subdir)] np.random.shuffle(file_names) i = 0 batch = np.zeros((batch_size, desired_size, desired_size, 3)) labels = np.zeros((batch_size, LABELS)) p = Pool() while True: for im, label in p.imap_unordered(read_image, file_names): if im == -1: continue batch[i], labels[i] = im, label if i == batch_size-1: yield batch, labels i = 0 else: i += 1
def create_dir_test(): """Verifies file_io directory handling methods .""" starttime = int(round(time.time() * 1000)) dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime) print("Creating dir %s" % dir_name) file_io.create_dir(dir_name) elapsed = int(round(time.time() * 1000)) - starttime print("Created directory in: %d milliseconds" % elapsed) # Check that the directory exists. dir_exists = file_io.is_directory(dir_name) print("%s directory exists: %s" % (dir_name, dir_exists)) # List contents of just created directory. starttime = int(round(time.time() * 1000)) print("Listing directory %s." % dir_name) print(file_io.list_directory(dir_name)) elapsed = int(round(time.time() * 1000)) - starttime print("Listed directory %s in %s milliseconds" % (dir_name, elapsed))
def load(self) -> T5ForConditionalGeneration: try: if not self.flush_cache: return self._fix_t5_model( T5ForConditionalGeneration.from_pretrained( str(self.model_cache_dir), from_tf=True, force_download=False)) except (RuntimeError, OSError): logging.info('T5 model weights not in cache.') m = re.search(r'model_checkpoint_path: "(.+?)"', self.ckpt_prefix) assert m is not None, 'checkpoint file malformed' # Copy over checkpoint data ckpt_patt = re.compile( rf'^{m.group(1)}\.(data-\d+-of-\d+|index|meta)$') for name in file_io.list_directory(self.url): if not ckpt_patt.match(name): continue url = os.path.join(self.url, name) url_stat = file_io.stat(url) cache_file_path = self.model_cache_dir / ckpt_patt.sub( rf'{TRANSFO_PREFIX}.\1', name) try: cs = os.stat(str(cache_file_path)) if cs.st_size == url_stat.length and cs.st_mtime_ns > url_stat.mtime_nsec and not self.flush_cache: logging.info(f'Skipping {name}...') continue except FileNotFoundError: pass logging.info(f'Caching {name}...') file_io.copy(url, str(cache_file_path), overwrite=True) # Transformers expects a model config.json config = T5Config.from_pretrained(self.model_type) with open(str(self.model_cache_dir / 'config.json'), 'w') as f: json.dump(config.__dict__, f, indent=4) return self._fix_t5_model( T5ForConditionalGeneration.from_pretrained(str( self.model_cache_dir), from_tf=True, force_download=False))
def dataset(data_path, batch_size=1, shuffle=False, repeat=False): names = file_io.list_directory(data_path) _paths = [] for name in names: _paths.append(os.path.join(data_path, name)) ds = tf.data.TFRecordDataset(_paths) ds = ds.map(__parse_proto) ds = ds.map( lambda article, abstract: tuple(tf.py_func( __preprocess_article_and_abstract, [article, abstract], [tf.string, tf.string], name='preprocess_article_and_abstract' ))) if shuffle: ds = ds.shuffle(buffer_size=100) ds = ds.batch(batch_size, drop_remainder=True) if repeat: ds = ds.repeat() return ds
def load(self, path, generate_binary_sequences=False): print("Loading data...") for f in file_io.list_directory(path): if f.find(".gui") != -1: gui = file_io.FileIO("{}/{}".format(path, f), 'r') file_name = f[:f.find(".gui")] if file_io.file_exists("{}/{}.png".format(path, file_name)): img = Utils.get_preprocessed_img( "{}/{}.png".format(path, file_name), IMAGE_SIZE) self.append(file_name, gui, img) elif file_io.file_exists("{}/{}.npz".format(path, file_name)): f_str = StringIO( file_io.read_file_to_string("{}/{}.npz".format( path, file_name))) img = np.load(f_str)["features"] self.append(file_name, gui, img) print("Generating sparse vectors...") self.voc.create_binary_representation() self.next_words = self.sparsify_labels(self.next_words, self.voc) if generate_binary_sequences: self.partial_sequences = self.binarize(self.partial_sequences, self.voc) else: self.partial_sequences = self.indexify(self.partial_sequences, self.voc) self.size = len(self.ids) assert self.size == len(self.input_images) == len( self.partial_sequences) == len(self.next_words) assert self.voc.size == len(self.voc.vocabulary) print("Dataset size: {}".format(self.size)) print("Vocabulary size: {}".format(self.voc.size)) self.input_shape = self.input_images[0].shape self.output_size = self.voc.size print("Input shape: {}".format(self.input_shape)) print("Output size: {}".format(self.output_size))
def __init__(self, export_dir): self._saved_model = saved_model_pb2.SavedModel() self._saved_model.saved_model_schema_version = ( constants.SAVED_MODEL_SCHEMA_VERSION) self._export_dir = export_dir if file_io.file_exists(export_dir): if file_io.list_directory(export_dir): raise AssertionError( "Export directory already exists, and isn't empty. Please choose " "a different export directory, or delete all the contents of the " "specified directory: %s" % export_dir) else: file_io.recursive_create_dir(self._export_dir) # Boolean to track whether variables and assets corresponding to the # SavedModel have been saved. Specifically, the first meta graph to be added # MUST use the add_meta_graph_and_variables() API. Subsequent add operations # on the SavedModel MUST use the add_meta_graph() API which does not save # weights. self._has_saved_variables = False
def leer_imagenes_etiquetas(ruta): ## variables x = [] y = [] ruta = ruta + "/" print('LA RUTA ES...' + ruta) ## obtenemos la lista de ficneros para la ruta pasada por parámentro filelist = sorted(file_io.list_directory(ruta)) random.seed(42) random.shuffle(filelist) print('TAMAÑO DIRECTORIO...' + str(len(filelist)) + " primero..." + filelist[1]) ## obtenemos las imágenes y las etiquetas de cada elemento de la lista for imgPath in filelist: imagen = leer_una_imagen(ruta + imgPath) imagen = img_to_array(imagen) # pasa a array x.append(imagen) # lee, redimensiona y lo añade al array #imagePath = ruta + imgPath #imagen = cv2.imread(imagePath) # lee #imagen = cv2.resize(imagen, (img_width, img_height)) # redimentsiona # extraemos la etiqueta del nombre de cada fichero #ruta_partes = imagePath.split("/") # lo dividimos en las partes de la ruta #nombreimagen = ruta_partes[len(ruta_partes)-1] # nos quedamos con el nombre del fichero, que es el último label = imgPath[imgPath.find("_") + 1:imgPath.find(".")] # la etiqueta está entre _ y . label = int( label ) - 1 # las etiquetas empiezan en 1, y les restamos 1 para que empiecen en 0 y.append(label) print('REGISTROS LEIDOS X...' + str(len(x))) print('REGISTROS LEIDOS Y...' + str(len(y))) return (x, y)
def load_library(library_location): """Loads a TensorFlow plugin. "library_location" can be a path to a specific shared object, or a folder. If it is a folder, all sahred objects that are named "libtfkernel*" will be loaded. When the library is loaded, kernels registered in the library via the `REGISTER_*` macros are made available in the TensorFlow process. Args: library_location: Path to the plugin or the folder of plugins. Relative or absolute filesystem path to a dynamic library file or folder. Returns: None Raises: OSError: When the file to be loaded is not found. RuntimeError: when unable to load the library. """ if file_io.file_exists(library_location): if file_io.is_directory(library_location): directory_contents = file_io.list_directory(library_location) kernel_libraries = [ os.path.join(library_location, f) for f in directory_contents if _is_shared_object(f) ] else: kernel_libraries = [library_location] for lib in kernel_libraries: py_tf.TF_LoadLibrary(lib) else: raise OSError( errno.ENOENT, 'The file or folder to load kernel libraries from does not exist.', library_location)
def leer_imagenes_etiquetas(ruta): ## variables x = [] y = [] ## obtenemos la lista de ficneros para la ruta pasada por parámentro ruta = ruta + "/" print('LA RUTA ES...' + ruta) filelist = sorted(file_io.list_directory(ruta)) random.seed(42) random.shuffle(filelist) ## obtenemos las imágenes y las etiquetas de cada elemento de la lista print("EMPIEZA A LEER ", time.time()) with tqdm(total=len(filelist)) as t: for imgPath in filelist: imagen = leer_una_imagen(ruta + imgPath) if not imagen: continue # si ha fallado la lectura, y por tanto viene vacÃo, nos saltamos la imagen try: imagen = img_to_array( imagen) # pasa a array, si falla nos saltamos la imagen except: continue x.append(imagen) label = imgPath[imgPath.find("_") + 1:imgPath.find( ".")] # la etiqueta está entre _ y . label = int( label ) - 1 # las etiquetas empiezan en 1, y les restamos 1 para que empiecen en 0 y.append(label) t.update(1) print('REGISTROS LEIDOS X...' + str(len(x))) print('REGISTROS LEIDOS Y...' + str(len(y))) return (x, y)
def load_library(library_location): """Loads a TensorFlow plugin. "library_location" can be a path to a specific shared object, or a folder. If it is a folder, all sahred objects that are named "libtfkernel*" will be loaded. When the library is loaded, kernels registered in the library via the `REGISTER_*` macros are made available in the TensorFlow process. Args: library_location: Path to the plugin or the folder of plugins. Relative or absolute filesystem path to a dynamic library file or folder. Returns: None Raises: OSError: When the file to be loaded is not found. RuntimeError: when unable to load the library. """ if file_io.file_exists(library_location): if file_io.is_directory(library_location): directory_contents = file_io.list_directory(library_location) kernel_libraries = [ os.path.join(library_location, f) for f in directory_contents if _is_shared_object(f)] else: kernel_libraries = [library_location] for lib in kernel_libraries: py_tf.TF_LoadLibrary(lib) else: raise OSError( errno.ENOENT, 'The file or folder to load kernel libraries from does not exist.', library_location)
def main(): logging.getLogger().setLevel(logging.INFO) args = parse_arguments() # Models trained with estimator are exported to base/export/export/123456781 directory. # Our trainer export only one model. export_parent_dir = os.path.join(args.model, 'export', 'export') model_export_dir = os.path.join(export_parent_dir, file_io.list_directory(export_parent_dir)[0]) schema = json.loads(file_io.read_file_to_string(args.schema)) run_predict(args.output, args.data, schema, args.target, model_export_dir, args.project, args.mode, args.batchsize) prediction_results = os.path.join(args.output, 'prediction_results-*') with open('/output.txt', 'w') as f: f.write(prediction_results) tmp = os.path.join(args.output, 'prediction_results-00000-of-00001') with open('/prediction-output.txt', 'w') as f: f.write(open(tmp,"r").read()) with file_io.FileIO(os.path.join(args.output, 'schema.json'), 'r') as f: schema = json.load(f) BUCKET_PATH = "/".join(args.output.split('/')[3:]) upload_to_minio('/prediction-output.txt', BUCKET_PATH) metadata = { 'outputs' : [{ 'type': 'table', 'storage': 'minio', 'format': 'csv', 'header': [x['name'] for x in schema], 'source': "{}://{}/{}/{}".format("minio",BUCKET_NAME,BUCKET_PATH,"prediction-output.txt") }] } with open('/mlpipeline-ui-metadata.json', 'w') as f: json.dump(metadata, f)
def testListDirectoryFailure(self): dir_path = os.path.join(self._base_dir, "test_dir") with self.assertRaises(errors.NotFoundError): file_io.list_directory(dir_path)
def load_session_bundle_from_path(export_dir, target="", config=None): """Load session bundle from the given path. The function reads input from the export_dir, constructs the graph data to the default graph and restores the parameters for the session created. Args: export_dir: the directory that contains files exported by exporter. target: The execution engine to connect to. See target in tf.Session() config: A ConfigProto proto with configuration options. See config in tf.Session() Returns: session: a tensorflow session created from the variable files. meta_graph: a meta graph proto saved in the exporter directory. Raises: RuntimeError: if the required files are missing or contain unrecognizable fields, i.e. the exported model is invalid. """ if hasattr(tf, "GIT_VERSION"): logging.info("tf.GIT_VERSION=%s", tf.GIT_VERSION) else: logging.info("tf.GIT_VERSION=unknown") meta_graph_filename = os.path.join(export_dir, META_GRAPH_DEF_FILENAME) if not file_io.file_exists(meta_graph_filename): raise RuntimeError("Expected meta graph file missing %s" % meta_graph_filename) variables_filename = "" variables_filename_list = [] additional_files_to_copy = [] checkpoint_sharded = False variables_index_filename = os.path.join(export_dir, VARIABLES_INDEX_FILENAME_V2) checkpoint_v2 = file_io.file_exists(variables_index_filename) if checkpoint_v2: # The checkpoint is in v2 format. variables_filename = os.path.join(export_dir, VARIABLES_FILENAME_V2) # Check to see if the file "export" exists or not. if file_io.file_exists(variables_filename): variables_filename_list = [variables_filename] else: # Check to see if the sharded file named "export-?????-of-?????" exists. variables_filename_list = fnmatch.filter( file_io.list_directory(export_dir), VARIABLES_FILENAME_PATTERN_V2) checkpoint_sharded = True # If the checkpoint is not local, we need to copy export.index locally too. additional_files_to_copy = [variables_index_filename] else: variables_filename = os.path.join(export_dir, VARIABLES_FILENAME) if file_io.file_exists(variables_filename): variables_filename_list = [variables_filename] else: variables_filename_list = fnmatch.filter( file_io.list_directory(export_dir), VARIABLES_FILENAME_PATTERN) checkpoint_sharded = True if not variables_filename_list or not variables_filename: raise RuntimeError("No or bad checkpoint files found in %s" % export_dir) # Prepare the files to restore a session. restore_files = "" if checkpoint_v2 or not checkpoint_sharded: # For checkpoint v2 or v1 with non-sharded files, use "export" to restore # the session. restore_files = VARIABLES_FILENAME else: restore_files = VARIABLES_FILENAME_PATTERN # Reads meta graph file. meta_graph_def = meta_graph_pb2.MetaGraphDef() with file_io.FileIO(meta_graph_filename, "r") as f: logging.info("Reading metagraph from %s", meta_graph_filename) meta_graph_def.ParseFromString(f.read()) collection_def = meta_graph_def.collection_def graph_def = tf.GraphDef() if GRAPH_KEY in collection_def: logging.info("Using value of collection %s for the graph.", GRAPH_KEY) # Use serving graph_def in MetaGraphDef collection_def if exists graph_def_any = collection_def[GRAPH_KEY].any_list.value if len(graph_def_any) != 1: raise RuntimeError( "Expected exactly one serving GraphDef in : %s" % meta_graph_def) else: graph_def_any[0].Unpack(graph_def) # Replace the graph def in meta graph proto. meta_graph_def.graph_def.CopyFrom(graph_def) # TODO(b/36055868): If we don't clear the collections then # import_meta_graph fails. # # We can't delete all the collections because some of them are used # by prediction to get the names of the input/output tensors. keys_to_delete = (set(meta_graph_def.collection_def.keys()) - set(keys_used_for_serving())) for k in keys_to_delete: del meta_graph_def.collection_def[k] else: logging.info( "No %s found in metagraph. Using metagraph as serving graph", GRAPH_KEY) tf.reset_default_graph() sess = tf.Session(target, graph=None, config=config) # Import the graph. saver = tf.train.import_meta_graph(meta_graph_def) # Restore the session. if variables_filename_list[0].startswith("gs://"): # Make copy from GCS files. # TODO(b/36052034): Retire this once tensorflow can access GCS. try: temp_dir_path = tempfile.mkdtemp("local_variable_files") for f in variables_filename_list + additional_files_to_copy: file_io.copy(f, os.path.join(temp_dir_path, os.path.basename(f))) saver.restore(sess, os.path.join(temp_dir_path, restore_files)) finally: try: shutil.rmtree(temp_dir_path) except OSError as e: if e.message == "Cannot call rmtree on a symbolic link": # Interesting synthetic exception made up by shutil.rmtree. # Means we received a symlink from mkdtemp. # Also means must clean up the symlink instead. os.unlink(temp_dir_path) else: raise else: saver.restore(sess, os.path.join(export_dir, restore_files)) init_op_tensor = None if INIT_OP_KEY in collection_def: init_ops = collection_def[INIT_OP_KEY].node_list.value if len(init_ops) != 1: raise RuntimeError("Expected exactly one serving init op in : %s" % meta_graph_def) init_op_tensor = tf.get_collection(INIT_OP_KEY)[0] if init_op_tensor: # Run the init op. sess.run(fetches=[init_op_tensor]) return sess, meta_graph_def
def create_dir_test(): """Verifies file_io directory handling methods.""" # Test directory creation. starttime_ms = int(round(time.time() * 1000)) dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime_ms) print("Creating dir %s" % dir_name) file_io.create_dir(dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Created directory in: %d milliseconds" % elapsed_ms) # Check that the directory exists. dir_exists = file_io.is_directory(dir_name) assert dir_exists print("%s directory exists: %s" % (dir_name, dir_exists)) # Test recursive directory creation. starttime_ms = int(round(time.time() * 1000)) recursive_dir_name = "%s/%s/%s" % (dir_name, "nested_dir1", "nested_dir2") print("Creating recursive dir %s" % recursive_dir_name) file_io.recursive_create_dir(recursive_dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Created directory recursively in: %d milliseconds" % elapsed_ms) # Check that the directory exists. recursive_dir_exists = file_io.is_directory(recursive_dir_name) assert recursive_dir_exists print("%s directory exists: %s" % (recursive_dir_name, recursive_dir_exists)) # Create some contents in the just created directory and list the contents. num_files = 10 files_to_create = ["file_%d.txt" % n for n in range(num_files)] for file_num in files_to_create: file_name = "%s/%s" % (dir_name, file_num) print("Creating file %s." % file_name) file_io.write_string_to_file(file_name, "test file.") print("Listing directory %s." % dir_name) starttime_ms = int(round(time.time() * 1000)) directory_contents = file_io.list_directory(dir_name) print(directory_contents) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Listed directory %s in %s milliseconds" % (dir_name, elapsed_ms)) assert set(directory_contents) == set(files_to_create + ["nested_dir1/"]) # Test directory renaming. dir_to_rename = "%s/old_dir" % dir_name new_dir_name = "%s/new_dir" % dir_name file_io.create_dir(dir_to_rename) assert file_io.is_directory(dir_to_rename) assert not file_io.is_directory(new_dir_name) starttime_ms = int(round(time.time() * 1000)) print("Will try renaming directory %s to %s" % (dir_to_rename, new_dir_name)) file_io.rename(dir_to_rename, new_dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms print("Renamed directory %s to %s in %s milliseconds" % ( dir_to_rename, new_dir_name, elapsed_ms)) assert not file_io.is_directory(dir_to_rename) assert file_io.is_directory(new_dir_name) # Test Delete directory recursively. print("Deleting directory recursively %s." % dir_name) starttime_ms = int(round(time.time() * 1000)) file_io.delete_recursively(dir_name) elapsed_ms = int(round(time.time() * 1000)) - starttime_ms dir_exists = file_io.is_directory(dir_name) assert not dir_exists print("Deleted directory recursively %s in %s milliseconds" % ( dir_name, elapsed_ms))
def run_tfma(slice_spec, eval_model_base_dir, tfma_run_dir, input_csv, working_dir, mode, project, setup_file, add_metrics_callbacks=None): """Does model analysis, using the given spec of how to 'slice', and returns an EvalResult that can be used with TFMA visualization functions. """ print("eval model base dir: %s" % eval_model_base_dir) # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become # available after training completes. retries = 0 sleeptime = 5 while retries < 20: try: eval_model_dir = os.path.join( eval_model_base_dir, file_io.list_directory(eval_model_base_dir)[0]) print("eval model dir: %s" % eval_model_dir) if 'temp' not in eval_model_dir: break else: print("Sleeping %s seconds to sync with GCS..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 except Exception as e: print(e) print("Sleeping %s seconds to sync with GCS..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 schema = taxi.read_schema('schema.pbtxt') temp_dir = os.path.join(working_dir, 'tmp') if mode == 'local': print("mode == local") options = {'project': project} pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options) runner = 'DirectRunner' elif mode == 'cloud': print("mode == cloud") options = { 'job_name': 'tfma-' + str(uuid.uuid4()), 'temp_location': temp_dir, 'project': project, 'save_main_session': True, 'setup_file': setup_file } pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options) runner = 'DataFlowRunner' else: raise ValueError("Invalid mode %s." % mode) display_only_data_location = input_csv with beam.Pipeline(runner, options=pipeline_options) as pipeline: with beam_impl.Context(temp_dir=temp_dir): csv_coder = taxi.make_csv_coder(schema) raw_data = ( pipeline | 'ReadFromText' >> beam.io.ReadFromText( input_csv, # coder=beam.coders.BytesCoder(), skip_header_lines=1) | 'ParseCSV' >> beam.Map(csv_coder.decode)) # Examples must be in clean tf-example format. coder = taxi.make_proto_coder(schema) raw_data = ( raw_data # | 'CleanData' >> beam.Map(taxi.clean_raw_data_dict) | 'ToSerializedTFExample' >> beam.Map(coder.encode)) _ = raw_data | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults( eval_saved_model_path=eval_model_dir, slice_spec=slice_spec, output_path=tfma_run_dir, add_metrics_callbacks=add_metrics_callbacks, display_only_data_location=input_csv) return tfma.load_eval_result(output_path=tfma_run_dir)
def main(): parser = argparse.ArgumentParser(description='ML Trainer') parser.add_argument( '--model_name', help='...', required=True) parser.add_argument( '--model_path', help='...', required=True) parser.add_argument('--cluster', type=str, help='GKE cluster set up for kubeflow. If set, zone must be provided. ' + 'If not set, assuming this runs in a GKE container and current ' + 'cluster is used.') parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') args = parser.parse_args() KUBEFLOW_NAMESPACE = 'kubeflow' # Make sure model dir exists before proceeding retries = 0 sleeptime = 5 while retries < 20: try: model_dir = os.path.join(args.model_path, file_io.list_directory(args.model_path)[-1]) print("model subdir: %s" % model_dir) break except Exception as e: #pylint: disable=broad-except print(e) print("Sleeping %s seconds to sync with GCS..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 if retries >= 20: print("could not get model subdir from %s, exiting" % args.model_path) exit(1) logging.getLogger().setLevel(logging.INFO) args_dict = vars(args) if args.cluster and args.zone: cluster = args_dict.pop('cluster') #pylint: disable=unused-variable zone = args_dict.pop('zone') #pylint: disable=unused-variable else: # Get cluster name and zone from metadata metadata_server = "http://metadata/computeMetadata/v1/instance/" metadata_flavor = {'Metadata-Flavor' : 'Google'} cluster = requests.get(metadata_server + "attributes/cluster-name", headers=metadata_flavor).text zone = requests.get(metadata_server + "zone", headers=metadata_flavor).text.split('/')[-1] # logging.info('Getting credentials for GKE cluster %s.' % cluster) # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster, # '--zone', zone]) logging.info('Generating training template.') template_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'tf-serve-template.yaml') target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tf-serve.yaml') with open(template_file, 'r') as f: with open(target_file, "w") as target: data = f.read() changed = data.replace('MODEL_NAME', args.model_name) changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE) changed2 = changed1.replace('MODEL_PATH', args.model_path) target.write(changed2) logging.info('deploying model serving.') subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml'])
def train(args): def get_dataset(): images = [] labels = [] local_zip = IO_utils.load_data(args.training_dir, LOCAL_TRAIN_FILE) print("zip path = ", local_zip) with file_io.FileIO(args.training_dir, 'r') as f: with ZipFile(f, 'r') as archive: file_list = archive.infolist() random.shuffle(file_list) print("image number", len(file_list)) for entry in file_list: with archive.open(entry) as file: try: open_img = Image.open(file) images.append(np.array(open_img)) label = np.zeros(num_classes) label = define_label_one_hot(file.name, label) labels.append(label) except Exception as error: print(error) images = np.array(images) labels = np.array(labels) x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.1, random_state=42) x_train = x_train / 255.0 x_test = x_test / 255.0 return x_train, x_test, y_train, y_test def define_label_one_hot(img, label): for c in class_names: if c in os.path.basename(img): label[class_names.index(c)] = 1 return label def define_label(img, label): for c in class_names: if c in os.path.basename(img): label = class_names.index(c) return label with file_io.FileIO(args.label_file, 'r') as f: class_names = f.read().split(",") num_classes = len(class_names) input_shape = (args.img_size, args.img_size, args.channel) print("labels", class_names) print("num_class", num_classes) X_train, X_test, y_train, y_test = get_dataset() print("Xtrain", X_train.shape) print("Ytrain", y_train.shape) model = model_helper.init_cnn_model(input_shape, num_classes) model.summary() # checkpoint checkpoint = ModelCheckpoint(LOCAL_MODEL_FILE, monitor='val_acc', verbose=1, save_best_only=True, mode='max') model.fit( X_train, y_train, batch_size=args.batch_size, epochs=args.epochs, validation_data=(X_test, y_test), callbacks=[checkpoint, model_helper.get_tensorboard_config("logs")], verbose=1) score = model.evaluate(X_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) print("Predict :", model.predict(X_test[:4])) model.save("last-" + LOCAL_MODEL_FILE) # predict first 4 images in the test set print("Predict :", model.predict(X_test[:4])) logs = file_io.list_directory("logs") print("logs = ", logs) IO_utils.save_file_in_cloud("last-" + LOCAL_MODEL_FILE, args.job_dir + "/" + 'last-' + args.job_name) IO_utils.save_file_in_cloud(LOCAL_MODEL_FILE, args.job_dir + "/" + args.job_name) for entry in logs: IO_utils.save_file_in_cloud("logs/" + entry, args.job_dir + "/logs/" + entry)
def main(argv=None): parser = argparse.ArgumentParser(description='ML Trainer') parser.add_argument('--model_name', help='...', required=True) parser.add_argument('--model_path', help='...', required=True) parser.add_argument('--aws_secret', help='...', required=False) parser.add_argument('--s3-endpoint', help="""...""", required=False) parser.add_argument( '--cluster', type=str, help='GKE cluster set up for kubeflow. If set, zone must be provided. ' + 'If not set, assuming this runs in a GKE container and current ' + 'cluster is used.') parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') args = parser.parse_args() # KUBEFLOW_NAMESPACE = 'default' KUBEFLOW_NAMESPACE = 'kubeflow' # Make sure model dir exists before proceeding retries = 0 sleeptime = 5 while retries < 20: try: model_dir = os.path.join( args.model_path, file_io.list_directory(args.model_path)[-1]) print("model subdir: %s" % model_dir) break except Exception as e: print(e) print("Sleeping %s seconds to sync with GCS..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 if retries >= 20: print("could not get model subdir from %s, exiting" % args.model_path) exit(1) logging.getLogger().setLevel(logging.INFO) logging.info('Running deploy-tf-serve.py') logging.info('Generating training template.') if args.aws_secret: template_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'tf-serve-creds-template.yaml') else: template_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'tf-serve-template.yaml') target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tf-serve.yaml') aws_region = os.environ.get('AWS_REGION', 'us-east-1') # o = urlparse(args.model_path) # if o.scheme == 's3': # client = s3_client(args.s3_endpoint) # override model s3 location region and endpoint # so tensorflow could access it without troubles # aws_region = client.get_bucket_location( # Bucket=o.netloc # ).get('LocationConstraint', 'us-east-1') with open(template_file, 'r') as f: with open(target_file, "w") as target: data = f.read() changed = data.replace('MODEL_NAME', args.model_name) changed = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE) changed = changed.replace('MODEL_PATH', args.model_path) changed = changed.replace('BUCKET_REGION', aws_region) if args.aws_secret: changed = changed.replace('AWS_SECRET_NAME', args.aws_secret) target.write(changed) logging.info('deploying model serving.') subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml'])
def read_image_file_names(dir_path): """Returns a list of absolute file paths for relative dir input with all relevant file names.""" return [ os.path.join(dir_path, p) for p in file_io.list_directory(dir_path) ]
def run_tfma(slice_spec, eval_model_base_dir, tfma_run_dir, input_csv, working_dir, mode, project, setup_file, add_metrics_callbacks=None): """Does model analysis, using the given spec of how to 'slice', and returns an EvalResult that can be used with TFMA visualization functions. """ print("eval model base dir: %s" % eval_model_base_dir) # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become # available after training completes. retries = 0 sleeptime = 5 while retries < 20: try: eval_model_dir = os.path.join( eval_model_base_dir, file_io.list_directory(eval_model_base_dir)[0]) print("eval model dir: %s" % eval_model_dir) if 'temp' not in eval_model_dir: break else: print("Sleeping %s seconds to sync with GCS..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 except Exception as e: print(e) print("Sleeping %s seconds to sync with GCS..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 schema = taxi.read_schema('schema.pbtxt') temp_dir = os.path.join(working_dir, 'tmp') if mode == 'local': print("mode == local") options = { 'project': project} pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options) runner = 'DirectRunner' elif mode == 'cloud': print("mode == cloud") options = { 'job_name': 'tfma-' + str(uuid.uuid4()), 'temp_location': temp_dir, 'project': project, 'save_main_session': True, 'setup_file': setup_file } pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options) runner = 'DataFlowRunner' else: raise ValueError("Invalid mode %s." % mode) display_only_data_location = input_csv with beam.Pipeline(runner, options=pipeline_options) as pipeline: with beam_impl.Context(temp_dir=temp_dir): csv_coder = taxi.make_csv_coder(schema) raw_data = ( pipeline | 'ReadFromText' >> beam.io.ReadFromText( input_csv, # coder=beam.coders.BytesCoder(), skip_header_lines=1) | 'ParseCSV' >> beam.Map(csv_coder.decode)) # Examples must be in clean tf-example format. coder = taxi.make_proto_coder(schema) raw_data = ( raw_data # | 'CleanData' >> beam.Map(taxi.clean_raw_data_dict) | 'ToSerializedTFExample' >> beam.Map(coder.encode)) _ = raw_data | 'EvaluateAndWriteResults' >> tfma.EvaluateAndWriteResults( eval_saved_model_path=eval_model_dir, slice_spec=slice_spec, output_path=tfma_run_dir, add_metrics_callbacks=add_metrics_callbacks, display_only_data_location=input_csv) return tfma.load_eval_result(output_path=tfma_run_dir)
def main(): parser = argparse.ArgumentParser(description='ML Trainer') parser.add_argument( '--model_name', help='...', required=True) parser.add_argument( '--model_path', help='...', required=True) parser.add_argument('--cluster', type=str, help='GKE cluster set up for kubeflow. If set, zone must be provided. ' + 'If not set, assuming this runs in a GKE container and current ' + 'cluster is used.') parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') args = parser.parse_args() KUBEFLOW_NAMESPACE = 'kubeflow' ts = str(int(time.time())) # Make sure model dir exists before proceeding retries = 0 sleeptime = 5 while retries < 20: try: model_dir = os.path.join(args.model_path, file_io.list_directory(args.model_path)[-1]) print("model subdir: %s" % model_dir) break except Exception as e: #pylint: disable=broad-except print(e) print("Sleeping %s seconds to sync with GCS..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 if retries >= 20: print("could not get model subdir from %s, exiting" % args.model_path) exit(1) logging.getLogger().setLevel(logging.INFO) args_dict = vars(args) if args.cluster and args.zone: cluster = args_dict.pop('cluster') #pylint: disable=unused-variable zone = args_dict.pop('zone') #pylint: disable=unused-variable else: # Get cluster name and zone from metadata metadata_server = "http://metadata/computeMetadata/v1/instance/" metadata_flavor = {'Metadata-Flavor' : 'Google'} cluster = requests.get(metadata_server + "attributes/cluster-name", headers=metadata_flavor).text zone = requests.get(metadata_server + "zone", headers=metadata_flavor).text.split('/')[-1] # logging.info('Getting credentials for GKE cluster %s.' % cluster) # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster, # '--zone', zone]) logging.info('Generating training template.') template_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'tf-serve-template.yaml') target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tf-serve.yaml') with open(template_file, 'r') as f: with open(target_file, "w") as target: data = f.read() changed = data.replace('MODEL_NAME', args.model_name) changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE) changed2 = changed1.replace('MODEL_PATH', args.model_path) changed3 = changed2.replace('SERVICE_NAME', args.model_name + ts) target.write(changed3) logging.info("template: %s", changed3) logging.info('deploying model serving.') subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml'])
def main(argv=None): parser = argparse.ArgumentParser(description='ML Trainer') parser.add_argument('--project', help='The GCS project to use', required=True) parser.add_argument( '--gcs-path', help= 'The GCS path to the trained model. The path should end with "../export/<model-name>".', required=True) parser.add_argument('--version-name', help='The model version name.', required=True) parser.add_argument('--model-name', help='The model name.', default='taxifare') parser.add_argument('--region', help='The model region.', default='us-central1') args = parser.parse_args() # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become # available after training completes. retries = 0 sleeptime = 5 while retries < 20: try: model_location = os.path.join( args.gcs_path, file_io.list_directory(args.gcs_path)[-1]) print("model location: %s" % model_location) break except Exception as e: print(e) print("Sleeping %s seconds to wait for GCS files..." % sleeptime) time.sleep(sleeptime) retries += 1 sleeptime *= 2 if retries >= 20: print("could not get model location subdir from %s, exiting" % args.gcs_path) exit(1) model_create_command = [ 'gcloud', 'ml-engine', 'models', 'create', args.model_name, '--regions', args.region, '--project', args.project ] print(model_create_command) result = subprocess.call(model_create_command) print(result) proper_version_name = args.version_name.replace('-', '_') print("using version name: %s" % proper_version_name) model_deploy_command = [ 'gcloud', 'ml-engine', 'versions', 'create', proper_version_name, '--model', args.model_name, '--runtime-version', '1.6', '--project', args.project, '--origin', model_location ] print(model_deploy_command) result2 = subprocess.call(model_deploy_command) print(result2)
type=int) parser.add_argument( '--eval-steps', help='Number of steps to run evalution for at each checkpoint', default=100, type=int) args = parser.parse_args() # Set python level verbosity tf.logging.set_verbosity(args.verbosity) # Set C++ Graph Execution level verbosity os.environ['TF_CPP_MIN_LOG_LEVEL'] = str( tf.logging.__dict__[args.verbosity] / 10) train_files = [] tflist = file_io.list_directory(args.train_files_dir) for x in tflist: if args.train_files_prefix in x: train_files.append(os.path.join(args.train_files_dir, x)) print("train files list: %s" % train_files) eval_files = [] eflist = file_io.list_directory(args.eval_files_dir) for x in eflist: if args.eval_files_prefix in x: eval_files.append(os.path.join(args.eval_files_dir, x)) print("eval files list: %s" % eval_files) # Run the training job hparams = tf.contrib.training.HParams(**args.__dict__) run_experiment(train_files, eval_files, hparams)