def testBasics(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), "dump") opts = builder( builder.time_and_memory()).with_file_output(outfile).build() x = lib.BuildFullModel() profile_str = None profile_step50 = os.path.join(test.get_temp_dir(), "profile_50") with profile_context.ProfileContext(test.get_temp_dir()) as pctx: pctx.add_auto_profiling("op", options=opts, profile_steps=[15, 50, 100]) with session.Session() as sess: sess.run(variables.global_variables_initializer()) total_steps = 101 if test.is_gpu_available() else 50 for i in range(total_steps): sess.run(x) if i == 14 or i == 99: self.assertTrue(gfile.Exists(outfile)) gfile.Remove(outfile) if i == 49: self.assertTrue(gfile.Exists(profile_step50)) with gfile.Open(outfile, "r") as f: profile_str = f.read() gfile.Remove(outfile) with lib.ProfilerFromFile( os.path.join(test.get_temp_dir(), "profile_50")) as profiler: profiler.profile_operations(options=opts) with gfile.Open(outfile, "r") as f: self.assertEqual(profile_str, f.read())
def _trainLoop(self, train_op, train_steps, time_dir, time_step, memory_dir, memory_step, profile_dir, dump_step): with session.Session() as sess: sess.run(variables.global_variables_initializer()) # start from 1 because variable_initializer took one step. for i in range(1, train_steps + 1): _ = sess.run(train_op) if i in time_step: ret = gfile.ListDirectory(time_dir) self.assertEqual(len(ret), 1) self.assertTrue( gfile.Open(os.path.join(time_dir, ret[0]), 'r').read() .find('execution time') > 0) _ = [gfile.Remove(os.path.join(time_dir, x)) for x in ret] else: self.assertEqual(len(gfile.ListDirectory(time_dir)), 0) if i in memory_step: ret = gfile.ListDirectory(memory_dir) self.assertEqual(len(ret), 1) self.assertTrue( gfile.Open(os.path.join(memory_dir, ret[0]), 'r').read() .find('requested bytes') > 0) _ = [gfile.Remove(os.path.join(memory_dir, x)) for x in ret] else: self.assertEqual(len(gfile.ListDirectory(memory_dir)), 0) if i in dump_step: ret = gfile.ListDirectory(profile_dir) self.assertAllEqual(ret, ['profile_%d' % i]) _ = [gfile.Remove(os.path.join(profile_dir, x)) for x in ret] else: if i < dump_step[0]: self.assertFalse(gfile.Exists(profile_dir)) else: self.assertEqual(len(gfile.ListDirectory(profile_dir)), 0)
def PTB_prepare_data(data_path, vocab_path): train_path, train_json_path = data_path['train'], data_path[ 'train'].replace('txt', 'json') valid_path, valid_json_path = data_path['val'], data_path['val'].replace( 'txt', 'json') test_path, test_json_path = data_path['test'], data_path['test'].replace( 'txt', 'json') data_ids = {} if gfile.Exists(train_json_path) and \ gfile.Exists(valid_json_path) and \ gfile.Exists(test_json_path): data_ids['train'] = read_data(train_json_path) data_ids['val'] = read_data(valid_json_path) data_ids['test'] = read_data(test_json_path) vocab_input, rev_vocab_input = initialize_vocabulary( vocab_path['input']) vocab_output, rev_vocab_output = initialize_vocabulary( vocab_path['output']) return (vocab_input, rev_vocab_input), (vocab_output, rev_vocab_output), data_ids word_to_id, id_to_word = _build_vocab(train_path) # create vocabulary file with open(vocab_path, 'wb') as f: for word in id_to_word: f.write(word + b"\n") # sentences to ids data_ids['train'] = _file_to_line_ids(train_path, word_to_id) data_ids['val'] = _file_to_line_ids(valid_path, word_to_id) data_ids['test'] = _file_to_line_ids(test_path, word_to_id) # make input, target pairs data_ids['train'] = [[line[:-1], line[1:]] for line in data_ids['train']] data_ids['val'] = [[line[:-1], line[1:]] for line in data_ids['val']] data_ids['test'] = [[line[:-1], line[1:]] for line in data_ids['test']] # shuffle the data data_ids['train'] = shuffle_data(data_ids['train'], {'train': 1.0})['train'] data_ids['val'] = shuffle_data(data_ids['val'], {'val': 1.0})['val'] data_ids['test'] = shuffle_data(data_ids['test'], {'test': 1.0})['test'] # dump data to json files if gfile.Exists(train_json_path): gfile.Remove(train_json_path) if gfile.Exists(valid_json_path): gfile.Remove(valid_json_path) if gfile.Exists(test_json_path): gfile.Remove(test_json_path) with open(train_json_path, 'wb') as f: json.dump(data_ids['train'], f, sort_keys=True, indent=4) with open(valid_json_path, 'wb') as f: json.dump(data_ids['val'], f, sort_keys=True, indent=4) with open(test_json_path, 'wb') as f: json.dump(data_ids['test'], f, sort_keys=True, indent=4) return (word_to_id, id_to_word), (word_to_id, id_to_word), data_ids
def _sync_dumped_data_block_meta(self): dumped_data_block_path = {} dumped_data_block_meta_path = {} dumped_data_block_meta = [] data_block_dir = self._data_block_dir() if not gfile.Exists(data_block_dir): gfile.MakeDirs(data_block_dir) elif not gfile.IsDirectory(data_block_dir): logging.fatal("%s must be the directory of data block for "\ "partition %d", data_block_dir, self._partition_id) os._exit(-1) # pylint: disable=protected-access for fpath in self._list_data_block_dir(): fname = ntpath.basename(fpath) if fname.endswith(DataBlockSuffix): ftag = fname[:-len(DataBlockSuffix)] dumped_data_block_path[ftag] = fpath elif fname.endswith(DataBlockMetaSuffix): ftag = fname[:-len(DataBlockMetaSuffix)] dumped_data_block_meta_path[ftag] = fpath else: gfile.Remove(fpath) for (ftag, fpath) in dumped_data_block_meta_path.items(): if ftag not in dumped_data_block_path: gfile.Remove(fpath) gfile.Remove(dumped_data_block_path[ftag]) else: with make_tf_record_iter(fpath) as record_iter: dbm = dj_pb.DataBlockMeta() dbm.ParseFromString(next(record_iter)) dumped_data_block_meta.append(dbm) dumped_data_block_meta = sorted(dumped_data_block_meta, key=lambda meta: meta.data_block_index) for (idx, meta) in enumerate(dumped_data_block_meta): if meta.data_block_index != idx: logging.fatal("data_block_index is not consecutive") os._exit(-1) # pylint: disable=protected-access if idx == 0: continue prev_meta = dumped_data_block_meta[idx - 1] if prev_meta.follower_restart_index > meta.follower_restart_index: logging.fatal("follower_restart_index is not Incremental") os._exit(-1) # pylint: disable=protected-access if prev_meta.leader_start_index >= meta.leader_start_index: logging.fatal("leader_start_index is not Incremental") os._exit(-1) # pylint: disable=protected-access if prev_meta.leader_end_index >= meta.leader_end_index: logging.fatal("leader_end_index is not Incremental") os._exit(-1) # pylint: disable=protected-access with self._lock: if len(dumped_data_block_meta) > len(self._dumped_data_block_meta): self._dumped_data_block_meta = dumped_data_block_meta
def test_create_file(self): """Test create file. """ # Setup and check preconditions. file_name = self._path_to("testfile") if gfile.Exists(file_name): gfile.Remove(file_name) # Create file. with gfile.Open(file_name, 'w') as w: w.write("") # Check that file was created. self.assertTrue(gfile.Exists(file_name)) gfile.Remove(file_name)
def testErrors(self): self.assertRaises(OSError, lambda: gfile.RmDir(self.tmp + "dir_doesnt_exist")) self.assertRaises(OSError, lambda: gfile.Remove(self.tmp + "file_doesnt_exist")) gfile.MkDir(self.tmp + "error_dir") with gfile.GFile(self.tmp + "error_dir/file", "w"): pass # Create file self.assertRaises(OSError, lambda: gfile.Remove(self.tmp + "error_dir")) self.assertRaises(OSError, lambda: gfile.RmDir(self.tmp + "error_dir")) self.assertTrue(gfile.Exists(self.tmp + "error_dir")) gfile.DeleteRecursively(self.tmp + "error_dir") self.assertFalse(gfile.Exists(self.tmp + "error_dir"))
def shuffle_records(fname): """Shuffle records in a single file.""" print("Shuffling records in file %s" % fname) # Rename file prior to shuffling tmp_fname = fname + ".unshuffled" gfile.Rename(fname, tmp_fname) reader = python_io.tf_record_iterator(tmp_fname) records = [] for record in reader: records.append(record) if len(records) % 100000 == 0: print("\tRead: %d", len(records)) random.shuffle(records) # Write shuffled records to original file name with python_io.TFRecordWriter(fname) as w: for count, record in enumerate(records): w.write(record) if count > 0 and count % 100000 == 0: print("\tWriting record: %d" % count) gfile.Remove(tmp_fname)
def edit_pb_txt(old_args, export_dir): """ Edit file path argument in pbtxt file. :param old_args: Old file paths need to be copied and edited. :param export_dir: Directory of the saved model. """ assets_extra_dir = os.path.join(export_dir, "./assets.extra") if not os.path.exists(assets_extra_dir): os.makedirs(assets_extra_dir) new_args = [] for one_old in old_args: if not os.path.exists(one_old): raise ValueError("{} do not exists!".format(one_old)) one_new = os.path.join(assets_extra_dir, os.path.basename(one_old)) new_args.append(one_new) logging.info("Copy file: {} to: {}".format(one_old, one_new)) gfile.Copy(one_old, one_new, overwrite=True) pbtxt_file = os.path.join(export_dir, "saved_model.pbtxt") tmp_file = pbtxt_file + ".tmp" logging.info("Editing pbtxt file: {}".format(pbtxt_file)) with open(pbtxt_file, "rt") as fin, open(tmp_file, "wt") as fout: for line in fin: for one_old, one_new in zip(old_args, new_args): line = line.replace(one_old, one_new) fout.write(line) gfile.Copy(tmp_file, pbtxt_file, overwrite=True) gfile.Remove(tmp_file)
def main(unused_argv) : master_spec = model.build_master_spec() try : gfile.Remove(FLAGS.spec_file) except errors.OpError as err : tf.logging.error('Unable to delete prior files: %s', err) model.write_master_spec(master_spec, FLAGS.spec_file)
def testBinaryAndTextFormat(self): test_dir = self._TestDir("binary_and_text") filename = os.path.join(test_dir, "metafile") with self.test_session(graph=tf.Graph()): # Creates a graph. tf.Variable(10.0, name="v0") # Exports the graph as binary format. tf.train.export_meta_graph(filename, as_text=False) with self.test_session(graph=tf.Graph()): # Imports the binary format graph. saver = tf.train.import_meta_graph(filename) # Exports the graph as text format. saver.export_meta_graph(filename, as_text=True) with self.test_session(graph=tf.Graph()): # Imports the text format graph. tf.train.import_meta_graph(filename) # Writes wrong contents to the file. tf.train.write_graph(saver.as_saver_def(), os.path.dirname(filename), os.path.basename(filename)) with self.test_session(graph=tf.Graph()): # Import should fail. with self.assertRaisesWithPredicateMatch( IOError, lambda e: "Cannot parse file"): tf.train.import_meta_graph(filename) # Deletes the file gfile.Remove(filename) with self.assertRaisesWithPredicateMatch( IOError, lambda e: "does not exist"): tf.train.import_meta_graph(filename)
def gfile_copy_callback(files_to_copy, export_dir_path): """Callback to copy files using `gfile.Copy` to an export directory. This method is used as the default `assets_callback` in `Exporter.init` to copy assets from the `assets_collection`. It can also be invoked directly to copy additional supplementary files into the export directory (in which case it is not a callback). Args: files_to_copy: A dictionary that maps original file paths to desired basename in the export directory. export_dir_path: Directory to copy the files to. """ logging.info("Write assest into: %s using gfile_copy.", export_dir_path) gfile.MakeDirs(export_dir_path) for source_filepath, basename in files_to_copy.items(): new_path = os.path.join( compat.as_bytes(export_dir_path), compat.as_bytes(basename)) logging.info("Copying asset %s to path %s.", source_filepath, new_path) if gfile.Exists(new_path): # Guard against being restarted while copying assets, and the file # existing and being in an unknown state. # TODO(b/28676216): Do some file checks before deleting. logging.info("Removing file %s.", new_path) gfile.Remove(new_path) gfile.Copy(source_filepath, new_path)
def main(unused_argv) : if len(sys.argv) == 1 : flags._global_parser.print_help() sys.exit(0) logging.set_verbosity(logging.INFO) check.IsTrue(FLAGS.training_corpus_path) check.IsTrue(FLAGS.tune_corpus_path) check.IsTrue(FLAGS.resource_path) check.IsTrue(FLAGS.checkpoint_filename) if not gfile.IsDirectory(FLAGS.resource_path): gfile.MakeDirs(FLAGS.resource_path) training_corpus_path = gfile.Glob(FLAGS.training_corpus_path)[0] tune_corpus_path = gfile.Glob(FLAGS.tune_corpus_path)[0] # SummaryWriter for TensorBoard tf.logging.info('TensorBoard directory: "%s"', FLAGS.tensorboard_dir) tf.logging.info('Deleting prior data if exists...') stats_file = '%s.stats' % FLAGS.checkpoint_filename try : stats = gfile.GFile(stats_file, 'r').readlines()[0].split(',') stats = [int(x) for x in stats] except errors.OpError : stats = [-1, 0, 0] tf.logging.info('Read ckpt stats: %s', str(stats)) do_restore = True if stats[0] < FLAGS.job_id : do_restore = False tf.logging.info('Deleting last job: %d', stats[0]) try : gfile.DeleteRecursively(FLAGS.tensorboard_dir) gfile.Remove(FLAGS.checkpoint_filename) except errors.OpError as err : tf.logging.error('Unable to delete prior files: %s', err) stats = [FLAGS.job_id, 0, 0] tf.logging.info('Creating the directory again...') gfile.MakeDirs(FLAGS.tensorboard_dir) tf.logging.info('Created! Instatiating SummaryWriter...') summary_writer = trainer_lib.get_summary_writer(FLAGS.tensorboard_dir) tf.logging.info('Creating TensorFlow checkpoint dir...') gfile.MakeDirs(os.path.dirname(FLAGS.checkpoint_filename)) # Constructs lexical resources for SyntaxNet in the given resource path, from # the training data. if FLAGS.compute_lexicon : logging.info('Computing lexicon...') lexicon.build_lexicon(FLAGS.resource_path, training_corpus_path, morph_to_pos=True) # Load master spec master_spec = model.load_master_spec(FLAGS.dragnn_spec, FLAGS.resource_path) # Build graph graph, builder, trainers, annotator = model.build_train_graph(master_spec) # Train train(graph, builder, trainers, annotator, summary_writer, do_restore, stats)
def generate_raw_data(self, data_source, partition_id, block_size, shuffle_win_size, feat_key_fmt, feat_val_fmt): dbm = data_block_manager.DataBlockManager(data_source, partition_id) raw_data_dir = os.path.join(data_source.raw_data_dir, 'partition_{}'.format(partition_id)) if gfile.Exists(raw_data_dir): gfile.DeleteRecursively(raw_data_dir) gfile.MakeDirs(raw_data_dir) useless_index = 0 for block_index in range(self.total_index // block_size): builder = data_block_manager.DataBlockBuilder( data_source.raw_data_dir, partition_id, block_index, None) cands = list( range(block_index * block_size, (block_index + 1) * block_size)) start_index = cands[0] for i in range(len(cands)): if random.randint(1, 4) > 2: continue a = random.randint(i - shuffle_win_size, i + shuffle_win_size) b = random.randint(i - shuffle_win_size, i + shuffle_win_size) if a < 0: a = 0 if a >= len(cands): a = len(cands) - 1 if b < 0: b = 0 if b >= len(cands): b = len(cands) - 1 if (abs(cands[a] - i - start_index) <= shuffle_win_size and abs(cands[b] - i - start_index) <= shuffle_win_size): cands[a], cands[b] = cands[b], cands[a] for example_idx in cands: feat = {} example_id = '{}'.format(example_idx).encode() feat['example_id'] = tf.train.Feature( bytes_list=tf.train.BytesList(value=[example_id])) event_time = 150000000 + example_idx feat['event_time'] = tf.train.Feature( int64_list=tf.train.Int64List(value=[event_time])) feat[feat_key_fmt.format(example_idx)] = tf.train.Feature( bytes_list=tf.train.BytesList( value=[feat_val_fmt.format(example_idx).encode()])) example = tf.train.Example(features=tf.train.Features( feature=feat)) builder.append(example.SerializeToString(), example_id, event_time, useless_index, useless_index) useless_index += 1 builder.finish_data_block() fpaths = [ os.path.join(raw_data_dir, f) for f in gfile.ListDirectory(raw_data_dir) if not gfile.IsDirectory(os.path.join(raw_data_dir, f)) ] for fpath in fpaths: if not fpath.endswith(common.DataBlockSuffix): gfile.Remove(fpath)
def testGetNoneShapeFromEmptyExamplesPath(self, file_name_to_write, tfrecord_path_to_match): output_file = test_utils.test_tmpfile(file_name_to_write) tfrecord.write_tfrecords([], output_file) self.assertIsNone( tf_utils.get_shape_from_examples_path( test_utils.test_tmpfile(tfrecord_path_to_match))) # Clean up gfile.Remove(output_file)
def testBasics(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), "dump") opts = builder( builder.time_and_memory()).with_file_output(outfile).build() x = lib.BuildFullModel() profile_str = None profile_step100 = os.path.join(test.get_temp_dir(), "profile_100") with profile_context.ProfileContext(test.get_temp_dir()) as pctx: pctx.add_auto_profiling("op", options=opts, profile_steps=[15, 50, 100]) with session.Session() as sess: sess.run(variables.global_variables_initializer()) total_steps = 101 for i in range(total_steps): sess.run(x) if i == 14 or i == 49: self.assertTrue(gfile.Exists(outfile)) gfile.Remove(outfile) if i == 99: self.assertTrue(gfile.Exists(profile_step100)) with gfile.Open(outfile, "r") as f: profile_str = f.read() gfile.Remove(outfile) self.assertEqual(set([15, 50, 100]), set(pctx.get_profiles("op").keys())) with lib.ProfilerFromFile( os.path.join(test.get_temp_dir(), "profile_100")) as profiler: profiler.profile_operations(options=opts) with gfile.Open(outfile, "r") as f: if test.is_built_with_rocm(): # The profiler output for ROCm mode, includes an extra warning related # to the lack of stream tracing in ROCm moed. Need to skip this warning # when doing the diff in ROCm mode profile_str = "\n".join(profile_str.split("\n")[7:]) self.assertEqual(profile_str, f.read())
def prepareNLUMessage(cls, message, test_seq_in_path): new_message = "BOS " + message + " EOS" count = 0 if gfile.Exists(test_seq_in_path): # remove file if exists gfile.Remove(test_seq_in_path) with gfile.GFile(test_seq_in_path, mode="w") as test_seq_in: while count < 5: test_seq_in.write(new_message + "\n") count += 1
def finish_example_id_dumper(self): self._tf_record_writer.close() self._tf_record_writer = None if self.dumped_example_number() > 0: fpath = self._get_dumped_fpath() gfile.Rename(self._tmp_fpath, fpath) return ExampleIdMeta(self._start_index, self._end_index, fpath) assert self._start_index == self._end_index gfile.Remove(self._tmp_fpath) return None
def test_remove(self): """Test remove. """ # Setup and check preconditions. file_name = self._path_to("1") self.assertFalse(gfile.Exists(file_name)) with gfile.Open(file_name, 'w') as w: w.write("") self.assertTrue(gfile.Exists(file_name)) # Remove file. gfile.Remove(file_name) # Check that file was removed. self.assertFalse(gfile.Exists(file_name))
def testGetShapeFromExamplesPath(self, file_name_to_write, tfrecord_path_to_match): example = example_pb2.Example() valid_shape = [1, 2, 3] example.features.feature['image/shape'].int64_list.value.extend(valid_shape) output_file = test_utils.test_tmpfile(file_name_to_write) tfrecord.write_tfrecords([example], output_file) self.assertEqual( valid_shape, tf_utils.get_shape_from_examples_path( test_utils.test_tmpfile(tfrecord_path_to_match))) # clean up gfile.Remove(output_file)
def testWriteToFileSucceeds(self): screen_output = debugger_cli_common.RichTextLines( ["Roses are red", "Violets are blue"], font_attr_segs={0: [(0, 5, "red")], 1: [(0, 7, "blue")]}) file_path = tempfile.mktemp() screen_output.write_to_file(file_path) with gfile.Open(file_path, "r") as f: self.assertEqual("Roses are red\nViolets are blue\n", f.read()) # Clean up. gfile.Remove(file_path)
def test_remove(self): """Test remove. """ # Setup and check preconditions. file_name = "igfs:///test_remove/1" self.assertFalse(gfile.Exists(file_name)) with gfile.Open(file_name, mode="w") as w: w.write("") self.assertTrue(gfile.Exists(file_name)) # Remove file. gfile.Remove(file_name) # Check that file was removed. self.assertFalse(gfile.Exists(file_name))
def test_write_read_file(self): """Test write/read file. """ # Setup and check preconditions. file_name = self._path_to("writereadfile") if gfile.Exists(file_name): gfile.Remove(file_name) # Write data. with gfile.Open(file_name, 'w') as w: w.write("Hello\n, world!") # Read data. with gfile.Open(file_name, 'r') as r: file_read = r.read() self.assertEqual(file_read, "Hello\n, world!")
def testWriteToFileSucceeds(self): screen_output = debugger_cli_common.RichTextLines( ["Roses are red", "Violets are blue"], font_attr_segs={ 0: [(0, 5, "red")], 1: [(0, 7, "blue")] }) fd, file_path = tempfile.mkstemp() os.close(fd) # file opened exclusively, so we need to close this # a better fix would be to make the API take a fd screen_output.write_to_file(file_path) with gfile.Open(file_path, "r") as f: self.assertEqual("Roses are red\nViolets are blue\n", f.read()) # Clean up. gfile.Remove(file_path)
def testWriteScreenOutputToFileWorks(self): output_path = tempfile.mktemp() ui = MockCursesUI(40, 80, command_sequence=[ string_to_codes("babble -n 2>%s\n" % output_path), self._EXIT ]) ui.register_command_handler("babble", self._babble, "") ui.run_ui() self.assertEqual(1, len(ui.unwrapped_outputs)) with gfile.Open(output_path, "r") as f: self.assertEqual(b"bar\nbar\n", f.read()) # Clean up output file. gfile.Remove(output_path)
def testAppendingRedirectErrors(self): output_path = tempfile.mktemp() ui = MockCursesUI(40, 80, command_sequence=[ string_to_codes("babble -n 2 >> %s\n" % output_path), self._EXIT ]) ui.register_command_handler("babble", self._babble, "") ui.run_ui() self.assertEqual(1, len(ui.unwrapped_outputs)) self.assertEqual([ "Syntax error for command: babble", "For help, do \"help babble\"" ], ui.unwrapped_outputs[0].lines) # Clean up output file. gfile.Remove(output_path)
def finish_data_block(self): assert self._example_num == len(self._data_block_meta.example_ids) self._tf_record_writer.close() self._tf_record_writer = None if len(self._data_block_meta.example_ids) > 0: data_block_id = self._generate_data_block_id() data_block_path = os.path.join(self._get_data_block_dir(), data_block_id + DataBlockSuffix) gfile.Rename(self._tmp_fpath, data_block_path) self._data_block_meta.start_time = self._start_time self._data_block_meta.end_time = self._end_time self._data_block_meta.block_id = data_block_id meta_tmp_fpath = self._get_tmp_fpath() with tf.io.TFRecordWriter(meta_tmp_fpath) as meta_writer: meta_writer.write(self._data_block_meta.SerializeToString()) meta_path = os.path.join(self._get_data_block_dir(), data_block_id + DataBlockMetaSuffix) gfile.Rename(meta_tmp_fpath, meta_path) else: gfile.Remove(self._tmp_fpath)
def testAutoTracingInDeubMode(self): ops.reset_default_graph() x = lib.BuildFullModel() with profile_context.ProfileContext(test.get_temp_dir(), debug=True): with session.Session() as sess: self.evaluate(variables.global_variables_initializer()) for _ in range(10): self.evaluate(x) for f in gfile.ListDirectory(test.get_temp_dir()): # Warm up, no tracing. self.assertFalse("run_meta" in f) self.evaluate(x) self.assertTrue( gfile.Exists(os.path.join(test.get_temp_dir(), "run_meta_11"))) gfile.Remove(os.path.join(test.get_temp_dir(), "run_meta_11")) # fetched already. self.evaluate(x) for f in gfile.ListDirectory(test.get_temp_dir()): self.assertFalse("run_meta" in f)
def generate_leader_raw_data(self): dbm = data_block_manager.DataBlockManager(self.data_source_l, 0) raw_data_dir = os.path.join(self.data_source_l.raw_data_dir, 'partition_{}'.format(0)) if gfile.Exists(raw_data_dir): gfile.DeleteRecursively(raw_data_dir) gfile.MakeDirs(raw_data_dir) block_index = 0 builder = data_block_manager.DataBlockBuilder( self.data_source_l.raw_data_dir, 0, block_index, None) for i in range(0, self.leader_end_index + 3): if i > 0 and i % 2048 == 0: builder.finish_data_block() block_index += 1 builder = data_block_manager.DataBlockBuilder( self.data_source_l.raw_data_dir, 0, block_index, None) feat = {} pt = i + 1 << 30 if i % 3 == 0: pt = i // 3 example_id = '{}'.format(pt).encode() feat['example_id'] = tf.train.Feature( bytes_list=tf.train.BytesList(value=[example_id])) event_time = 150000000 + pt feat['event_time'] = tf.train.Feature( int64_list=tf.train.Int64List(value=[event_time])) example = tf.train.Example(features=tf.train.Features( feature=feat)) builder.append(example.SerializeToString(), example_id, event_time, i, i) builder.finish_data_block() fpaths = [ os.path.join(raw_data_dir, f) for f in gfile.ListDirectory(raw_data_dir) if not gfile.IsDirectory(os.path.join(raw_data_dir, f)) ] for fpath in fpaths: if not fpath.endswith(common.DataBlockSuffix): gfile.Remove(fpath) self.manifest_manager = raw_data_manifest_manager.RawDataManifestManager( self.etcd, self.data_source_l)
def _test_read_file_offset_and_dataset(self): """Test read file with dataset """ # Note: disabled for now. Will enable once # all moved to eager mode # Setup and check preconditions. file_name = self._path_to("readfiledataset") if gfile.Exists(file_name): gfile.Remove(file_name) # Write data. with gfile.Open(file_name, 'w') as w: w.write("Hello1,world1!\nHello2,world2!") dataset = tf.data.experimental.CsvDataset(file_name, [tf.string, tf.string]) expected = [[b"Hello1", b"world1!"], [b"Hello2", b"world2!"]] i = 0 for v in dataset: v0, v1 = v assert v0.numpy() == expected[i][0] assert v1.numpy() == expected[i][1] i += 1 assert i == 2
def _MaybeDeleteOldCheckpoints(self, latest_save_path): """Deletes old checkpoints if necessary. Always keep the last `max_to_keep` checkpoints. If `keep_checkpoint_every_n_hours` was specified, keep an additional checkpoint every `N` hours. For example, if `N` is 0.5, an additional checkpoint is kept for every 0.5 hours of training; if `N` is 10, an additional checkpoint is kept for every 10 hours of training. Args: latest_save_path: Name including path of checkpoint file to save. """ if not self._max_to_keep: return # Remove first from list if the same name was used before. for p in self._last_checkpoints: if latest_save_path == self._CheckpointFilename(p): self._last_checkpoints.remove(p) # Append new path to list self._last_checkpoints.append((latest_save_path, time.time())) # If more than max_to_keep, remove oldest. if len(self._last_checkpoints) > self._max_to_keep: p = self._last_checkpoints.pop(0) # Do not delete the file if we keep_checkpoint_every_n_hours is set and we # have reached N hours of training. should_keep = p[1] > self._next_checkpoint_time if should_keep: self._next_checkpoint_time += ( self._keep_checkpoint_every_n_hours * 3600) return # Otherwise delete the files. for f in gfile.Glob(self._CheckpointFilename(p)): try: gfile.Remove(f) except OSError as e: logging.warning("Ignoring: %s", str(e))